@sanity/ailf 0.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/README.md +0 -1
  2. package/config/features.ts +23 -0
  3. package/config/models.ts +95 -0
  4. package/config/prompts.ts +16 -0
  5. package/config/rubrics.ts +225 -0
  6. package/config/schedules.ts +47 -0
  7. package/config/sinks.ts +37 -0
  8. package/config/sources.ts +21 -0
  9. package/config/thresholds.ts +61 -0
  10. package/dist/_vendor/ailf-core/config-helpers.d.ts +171 -0
  11. package/dist/_vendor/ailf-core/config-helpers.js +170 -0
  12. package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
  13. package/dist/_vendor/ailf-core/env-helper.js +45 -0
  14. package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
  15. package/dist/_vendor/ailf-core/examples/index.js +25 -0
  16. package/dist/_vendor/ailf-core/index.d.ts +3 -0
  17. package/dist/_vendor/ailf-core/index.js +5 -0
  18. package/dist/_vendor/ailf-core/ports/context.d.ts +17 -2
  19. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
  20. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
  21. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
  22. package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
  23. package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
  24. package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
  25. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +8 -2
  26. package/dist/_vendor/ailf-core/schemas/eval-config.js +17 -2
  27. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +9 -3
  28. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +8 -1
  29. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -31
  30. package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -9
  31. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
  32. package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
  33. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
  34. package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
  35. package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
  36. package/dist/_vendor/ailf-core/services/index.js +2 -1
  37. package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
  38. package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
  39. package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
  40. package/dist/_vendor/ailf-core/services/scoring.js +25 -15
  41. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
  42. package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
  43. package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
  44. package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +332 -0
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
  47. package/dist/_vendor/ailf-core/types/index.d.ts +45 -83
  48. package/dist/_vendor/ailf-core/types/index.js +8 -1
  49. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +257 -0
  50. package/dist/_vendor/ailf-core/types/plugin-registry.js +185 -0
  51. package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
  52. package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
  53. package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
  54. package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
  55. package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
  56. package/dist/_vendor/ailf-core/types/trace.js +18 -0
  57. package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
  58. package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
  59. package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
  60. package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
  61. package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
  62. package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
  63. package/dist/_vendor/ailf-shared/index.d.ts +0 -1
  64. package/dist/_vendor/ailf-shared/index.js +0 -1
  65. package/dist/adapters/api-client/build-request.js +14 -13
  66. package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
  67. package/dist/adapters/config-sources/file-config-adapter.js +39 -12
  68. package/dist/adapters/config-sources/index.d.ts +2 -0
  69. package/dist/adapters/config-sources/index.js +1 -0
  70. package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
  71. package/dist/adapters/config-sources/ts-config-loader.js +141 -0
  72. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
  73. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
  74. package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
  75. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  76. package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
  77. package/dist/adapters/task-sources/content-lake-task-source.js +35 -39
  78. package/dist/adapters/task-sources/index.d.ts +3 -2
  79. package/dist/adapters/task-sources/index.js +3 -2
  80. package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
  81. package/dist/adapters/task-sources/repo-schemas.js +227 -19
  82. package/dist/adapters/task-sources/repo-task-source.d.ts +16 -12
  83. package/dist/adapters/task-sources/repo-task-source.js +92 -80
  84. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  85. package/dist/adapters/task-sources/repo-validation.js +126 -5
  86. package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
  87. package/dist/adapters/task-sources/task-file-loader.js +83 -0
  88. package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
  89. package/dist/adapters/task-sources/yaml-task-source.js +19 -16
  90. package/dist/cli.js +0 -2
  91. package/dist/commands/baseline.js +4 -1
  92. package/dist/commands/calculate-scores.js +1 -1
  93. package/dist/commands/coverage-audit.js +9 -1
  94. package/dist/commands/explain-handler.js +25 -23
  95. package/dist/commands/fetch-docs.js +3 -2
  96. package/dist/commands/generate-configs.js +1 -1
  97. package/dist/commands/init.d.ts +6 -4
  98. package/dist/commands/init.js +302 -23
  99. package/dist/commands/interactive.js +11 -7
  100. package/dist/commands/pipeline-action.d.ts +2 -0
  101. package/dist/commands/pipeline-action.js +16 -6
  102. package/dist/commands/pipeline.d.ts +1 -0
  103. package/dist/commands/pipeline.js +4 -2
  104. package/dist/commands/pr-comment.js +1 -1
  105. package/dist/commands/publish.js +2 -2
  106. package/dist/commands/readiness-report.js +13 -6
  107. package/dist/commands/validate-tasks.d.ts +2 -2
  108. package/dist/commands/validate-tasks.js +26 -15
  109. package/dist/composition-root.d.ts +13 -1
  110. package/dist/composition-root.js +99 -4
  111. package/dist/index.d.ts +41 -0
  112. package/dist/index.js +48 -0
  113. package/dist/orchestration/build-app-context.js +1 -0
  114. package/dist/orchestration/build-step-sequence.js +28 -8
  115. package/dist/orchestration/steps/calculate-scores-step.js +24 -11
  116. package/dist/orchestration/steps/fetch-docs-step.js +8 -7
  117. package/dist/orchestration/steps/gap-analysis-step.js +8 -7
  118. package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
  119. package/dist/orchestration/steps/generate-configs-step.js +261 -51
  120. package/dist/orchestration/steps/grader-consistency-step.js +7 -4
  121. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  122. package/dist/orchestration/steps/readiness-step.js +5 -6
  123. package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
  124. package/dist/orchestration/steps/run-eval-step.js +8 -7
  125. package/dist/pipeline/cache.d.ts +1 -1
  126. package/dist/pipeline/cache.js +36 -8
  127. package/dist/pipeline/calculate-scores.d.ts +2 -4
  128. package/dist/pipeline/calculate-scores.js +43 -113
  129. package/dist/pipeline/checks.js +2 -2
  130. package/dist/pipeline/compare.js +8 -8
  131. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
  132. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
  133. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
  134. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
  135. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
  136. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
  137. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
  138. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
  139. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
  140. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +392 -0
  141. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
  142. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
  143. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
  144. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
  145. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
  146. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +404 -0
  147. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
  148. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
  149. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
  150. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
  151. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
  152. package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
  153. package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
  154. package/dist/pipeline/compiler/assertion-mapper.js +175 -0
  155. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
  156. package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
  157. package/dist/pipeline/compiler/config-loader.d.ts +56 -0
  158. package/dist/pipeline/compiler/config-loader.js +111 -0
  159. package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
  160. package/dist/pipeline/compiler/fixture-resolver.js +113 -0
  161. package/dist/pipeline/compiler/hash.d.ts +11 -0
  162. package/dist/pipeline/compiler/hash.js +18 -0
  163. package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
  164. package/dist/pipeline/compiler/ignore-fields.js +113 -0
  165. package/dist/pipeline/compiler/index.d.ts +29 -0
  166. package/dist/pipeline/compiler/index.js +45 -0
  167. package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
  168. package/dist/pipeline/compiler/literacy-bridge.js +172 -0
  169. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  170. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  171. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  172. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  173. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  174. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  175. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
  176. package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
  177. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  178. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  179. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
  180. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
  181. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
  182. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
  183. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
  184. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
  185. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  186. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  187. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  188. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  189. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  190. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  191. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  192. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  193. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  194. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  195. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  196. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  197. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  198. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  199. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  200. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  201. package/dist/pipeline/compiler/mode-handlers/index.d.ts +15 -0
  202. package/dist/pipeline/compiler/mode-handlers/index.js +19 -0
  203. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  204. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  205. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  206. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  207. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  208. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  209. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  210. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  211. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  212. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  213. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  214. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  215. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  216. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  217. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  218. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  219. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  220. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  221. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  222. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  223. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  224. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  225. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  226. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  227. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  228. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  229. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  230. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  231. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  232. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  233. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  234. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  235. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  236. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
  237. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  238. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  239. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  240. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  241. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  242. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
  243. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
  245. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  246. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  247. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
  248. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
  249. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
  250. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  251. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  252. package/dist/pipeline/compiler/preset-loader.js +99 -0
  253. package/dist/pipeline/compiler/presets/index.d.ts +9 -0
  254. package/dist/pipeline/compiler/presets/index.js +8 -0
  255. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +42 -0
  256. package/dist/pipeline/compiler/presets/sanity-literacy.js +208 -0
  257. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
  258. package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
  259. package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
  260. package/dist/pipeline/compiler/provider-assembler.js +137 -0
  261. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
  262. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
  263. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
  264. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
  265. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
  266. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
  267. package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
  268. package/dist/pipeline/compiler/sandbox/index.js +11 -0
  269. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
  270. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
  271. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
  272. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
  273. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
  274. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
  275. package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
  276. package/dist/pipeline/compiler/scoring-bridge.js +114 -0
  277. package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
  278. package/dist/pipeline/compiler/task-graph-builder.js +291 -0
  279. package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
  280. package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
  281. package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
  282. package/dist/pipeline/compiler/telemetry/index.js +19 -0
  283. package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
  284. package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
  285. package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
  286. package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
  287. package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
  288. package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
  289. package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
  290. package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
  291. package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
  292. package/dist/pipeline/compiler/variable-resolver.js +115 -0
  293. package/dist/pipeline/coverage-audit.d.ts +15 -5
  294. package/dist/pipeline/coverage-audit.js +41 -22
  295. package/dist/pipeline/eval-constants.d.ts +16 -6
  296. package/dist/pipeline/eval-constants.js +25 -4
  297. package/dist/pipeline/eval-fingerprint.d.ts +2 -2
  298. package/dist/pipeline/eval-fingerprint.js +8 -9
  299. package/dist/pipeline/expand-tasks.d.ts +19 -10
  300. package/dist/pipeline/expand-tasks.js +34 -28
  301. package/dist/pipeline/gap-analysis.d.ts +1 -1
  302. package/dist/pipeline/gap-analysis.js +2 -2
  303. package/dist/pipeline/generate-configs.d.ts +22 -4
  304. package/dist/pipeline/generate-configs.js +53 -24
  305. package/dist/pipeline/grader-api.d.ts +3 -3
  306. package/dist/pipeline/grader-api.js +5 -12
  307. package/dist/pipeline/grader-compare-runner.js +20 -27
  308. package/dist/pipeline/grader-comparison.d.ts +4 -8
  309. package/dist/pipeline/grader-comparison.js +11 -17
  310. package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
  311. package/dist/pipeline/grader-consistency-runner.js +16 -20
  312. package/dist/pipeline/grader-consistency.d.ts +6 -10
  313. package/dist/pipeline/grader-consistency.js +13 -32
  314. package/dist/pipeline/grader-sensitivity-runner.js +7 -5
  315. package/dist/pipeline/grader-sensitivity.d.ts +2 -6
  316. package/dist/pipeline/grader-sensitivity.js +10 -10
  317. package/dist/pipeline/grader-validate-runner.js +7 -5
  318. package/dist/pipeline/grader-validation.d.ts +2 -6
  319. package/dist/pipeline/grader-validation.js +14 -22
  320. package/dist/pipeline/map-request-to-config.js +7 -1
  321. package/dist/pipeline/mirror-repo-tasks.d.ts +13 -13
  322. package/dist/pipeline/mirror-repo-tasks.js +22 -21
  323. package/dist/pipeline/normalize-mode.d.ts +49 -0
  324. package/dist/pipeline/normalize-mode.js +64 -0
  325. package/dist/pipeline/plan.d.ts +5 -2
  326. package/dist/pipeline/plan.js +134 -78
  327. package/dist/pipeline/pr-comment.js +2 -0
  328. package/dist/pipeline/profile-resolution.d.ts +22 -14
  329. package/dist/pipeline/profile-resolution.js +41 -19
  330. package/dist/pipeline/provenance.d.ts +2 -2
  331. package/dist/pipeline/provenance.js +12 -17
  332. package/dist/pipeline/release-report.js +4 -4
  333. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  334. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  335. package/dist/pipeline/rubric-loader.d.ts +20 -0
  336. package/dist/pipeline/rubric-loader.js +37 -0
  337. package/dist/pipeline/validate.d.ts +4 -4
  338. package/dist/pipeline/validate.js +64 -53
  339. package/dist/schedules/loader.js +18 -8
  340. package/dist/scripts/migrate-task-mode.d.ts +24 -0
  341. package/dist/scripts/migrate-task-mode.js +85 -0
  342. package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
  343. package/dist/scripts/validate-task-sources.d.ts +1 -1
  344. package/dist/scripts/validate-task-sources.js +15 -15
  345. package/dist/sinks/loader.js +5 -7
  346. package/dist/sources.d.ts +7 -7
  347. package/dist/sources.js +22 -24
  348. package/dist/webhook/dispatch.js +2 -1
  349. package/package.json +15 -4
  350. package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
  351. package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
  352. package/tasks/literacy/frameworks.task.ts +128 -0
  353. package/tasks/literacy/functions.task.ts +69 -0
  354. package/tasks/literacy/groq.task.ts +258 -0
  355. package/tasks/literacy/nextjs-live.task.ts +75 -0
  356. package/tasks/literacy/studio-setup.task.ts +131 -0
  357. package/tasks/literacy/visual-editing.task.ts +146 -0
  358. package/config/features.yaml +0 -116
  359. package/config/models.yaml +0 -116
  360. package/config/prompts.yaml +0 -75
  361. package/config/rubrics.yaml +0 -81
  362. package/config/schedules.yaml +0 -43
  363. package/config/sinks.yaml +0 -54
  364. package/config/sources.yaml +0 -51
  365. package/config/thresholds.yaml +0 -49
  366. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  367. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  368. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  369. package/dist/_vendor/ailf-tasks/index.js +0 -16
  370. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  371. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  372. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  373. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  374. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  375. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  376. package/dist/agent-observer/test-imports.d.ts +0 -7
  377. package/dist/agent-observer/test-imports.js +0 -185
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Public types for the knowledge-probe mode handler.
3
+ */
4
+ import type { PromptfooPrompt, PromptfooProvider, PromptfooTestCase } from "../../promptfoo-compiler.js";
5
+ /** Options for compiling a knowledge probe task */
6
+ export interface KnowledgeProbeCompileOptions {
7
+ /** Grader provider for LLM-graded assertions */
8
+ graderProvider?: string;
9
+ /** Model registry — knowledge probes run across all configured models */
10
+ models?: {
11
+ id: string;
12
+ label: string;
13
+ config?: Record<string, unknown>;
14
+ }[];
15
+ }
16
+ /** Result of compiling a single knowledge probe task */
17
+ export interface KnowledgeProbeCompileResult {
18
+ /** Promptfoo provider configs (one per model) */
19
+ providers: PromptfooProvider[];
20
+ /** Compiled test cases */
21
+ tests: PromptfooTestCase[];
22
+ /** Prompts for evaluation */
23
+ prompts: PromptfooPrompt[];
24
+ /** Mode metadata for cross-model comparison */
25
+ metadata: KnowledgeProbeMetadata;
26
+ /** Warnings generated during compilation */
27
+ warnings: string[];
28
+ }
29
+ /** Metadata attached to knowledge probe results for comparison */
30
+ export interface KnowledgeProbeMetadata {
31
+ /** Evaluation mode identifier */
32
+ mode: "knowledge-probe";
33
+ /** Probe strategy used */
34
+ probeStrategy: string;
35
+ /** Whether doc context was intentionally excluded */
36
+ noDocContext: true;
37
+ /** Whether retrieval metrics are applicable */
38
+ retrievalMetrics: false;
39
+ }
40
+ /** Validation errors for knowledge probe task definitions */
41
+ export interface KnowledgeProbeValidationError {
42
+ field: string;
43
+ message: string;
44
+ }
@@ -0,0 +1,4 @@
1
+ /**
2
+ * Public types for the knowledge-probe mode handler.
3
+ */
4
+ export {};
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Validation logic for knowledge probe task definitions.
3
+ */
4
+ import type { KnowledgeProbeTaskDefinition } from "../../../../_vendor/ailf-core/index.d.ts";
5
+ import type { KnowledgeProbeValidationError } from "./types.js";
6
+ /**
7
+ * Validate that a knowledge probe task definition has all required fields.
8
+ */
9
+ export declare function validateKnowledgeProbeTask(task: KnowledgeProbeTaskDefinition): KnowledgeProbeValidationError[];
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Validation logic for knowledge probe task definitions.
3
+ */
4
+ /**
5
+ * Validate that a knowledge probe task definition has all required fields.
6
+ */
7
+ export function validateKnowledgeProbeTask(task) {
8
+ const errors = [];
9
+ if (!task.id) {
10
+ errors.push({ field: "id", message: "Task ID is required" });
11
+ }
12
+ if (!task.title) {
13
+ errors.push({ field: "title", message: "Task title is required" });
14
+ }
15
+ // Knowledge probes must have either a prompt or a description
16
+ if (!task.prompt?.text && !task.prompt?.vars?.task && !task.description) {
17
+ errors.push({
18
+ field: "prompt",
19
+ message: "Knowledge probe tasks require either prompt.text, prompt.vars.task, " +
20
+ "or description — the question to ask the model",
21
+ });
22
+ }
23
+ return errors;
24
+ }
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Assertion resolution for literacy tasks.
3
+ *
4
+ * Handles rubric template resolution, doc-coverage auto-generation,
5
+ * and baseline assertion filtering.
6
+ */
7
+ import type { LiteracyTaskDefinition } from "../../../../_vendor/ailf-core/index.d.ts";
8
+ import type { PromptfooAssertion } from "../../assertion-mapper.js";
9
+ import type { LiteracyCompileOptions } from "./types.js";
10
+ export declare function resolveAssertions(task: LiteracyTaskDefinition, options: LiteracyCompileOptions | undefined, warnings: string[]): PromptfooAssertion[];
11
+ /**
12
+ * Build baseline assertions matching the legacy expand-tasks behavior.
13
+ *
14
+ * - "full": all assertions carried over
15
+ * - "abbreviated": only first llm-rubric with shortened prompt
16
+ * - "none": no assertions
17
+ */
18
+ export declare function buildBaselineAssertions(goldAssertions: PromptfooAssertion[], rubricMode?: "abbreviated" | "full" | "none"): PromptfooAssertion[];
@@ -0,0 +1,118 @@
1
+ /**
2
+ * Assertion resolution for literacy tasks.
3
+ *
4
+ * Handles rubric template resolution, doc-coverage auto-generation,
5
+ * and baseline assertion filtering.
6
+ */
7
+ // ---------------------------------------------------------------------------
8
+ // Assertion resolution
9
+ // ---------------------------------------------------------------------------
10
+ export function resolveAssertions(task, options, warnings) {
11
+ const assertions = [];
12
+ for (const a of task.assertions ?? []) {
13
+ if (a.type === "llm-rubric" && "template" in a) {
14
+ const resolved = resolveTemplatedAssertion(a, options?.rubricConfig, options?.graderProvider, warnings);
15
+ if (resolved)
16
+ assertions.push(resolved);
17
+ }
18
+ else {
19
+ assertions.push({
20
+ type: a.type,
21
+ ...("value" in a ? { value: a.value } : {}),
22
+ ...(typeof a.weight === "number"
23
+ ? { weight: a.weight }
24
+ : {}),
25
+ ...(a.type === "llm-rubric" && options?.graderProvider
26
+ ? { provider: options.graderProvider }
27
+ : {}),
28
+ });
29
+ }
30
+ }
31
+ // Doc-coverage auto-generation
32
+ if (task.docCoverage) {
33
+ const docCoverageAssertion = buildDocCoverageAssertion(options?.rubricConfig, options?.graderProvider);
34
+ if (docCoverageAssertion)
35
+ assertions.push(docCoverageAssertion);
36
+ }
37
+ return assertions;
38
+ }
39
+ // ---------------------------------------------------------------------------
40
+ // Rubric template resolution
41
+ // ---------------------------------------------------------------------------
42
+ function resolveTemplatedAssertion(a, rubricConfig, graderProvider, warnings) {
43
+ if (!rubricConfig) {
44
+ warnings.push(`No rubric config — template "${a.template}" cannot be resolved`);
45
+ return null;
46
+ }
47
+ const template = rubricConfig.templates[a.template];
48
+ if (!template) {
49
+ warnings.push(`Unknown rubric template: "${a.template}"`);
50
+ return null;
51
+ }
52
+ const scaleText = template.scale.map((s) => `- ${s}`).join("\n");
53
+ const criteriaText = a.criteria.map((c) => `- ${c}`).join("\n");
54
+ const rubricValue = `${template.header}\n${scaleText}\n\n` +
55
+ `${template.criteria_label ?? "Check for:"}\n${criteriaText}\n\n` +
56
+ `Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}`;
57
+ return {
58
+ type: "llm-rubric",
59
+ value: rubricValue,
60
+ ...(graderProvider ? { provider: graderProvider } : {}),
61
+ ...(template.dimension
62
+ ? { metadata: { dimension: template.dimension, maxScore: 100 } }
63
+ : {}),
64
+ };
65
+ }
66
+ // ---------------------------------------------------------------------------
67
+ // Doc-coverage assertion
68
+ // ---------------------------------------------------------------------------
69
+ function buildDocCoverageAssertion(rubricConfig, graderProvider) {
70
+ if (!rubricConfig?.templates["doc-coverage"])
71
+ return null;
72
+ const template = rubricConfig.templates["doc-coverage"];
73
+ const scaleText = template.scale.map((s) => `- ${s}`).join("\n");
74
+ const rubricValue = `${template.header}\n${scaleText}\n\n` +
75
+ `Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}`;
76
+ return {
77
+ type: "llm-rubric",
78
+ value: rubricValue,
79
+ ...(graderProvider ? { provider: graderProvider } : {}),
80
+ ...(template.dimension
81
+ ? { metadata: { dimension: template.dimension, maxScore: 100 } }
82
+ : {}),
83
+ };
84
+ }
85
+ // ---------------------------------------------------------------------------
86
+ // Baseline assertion filtering
87
+ // ---------------------------------------------------------------------------
88
+ /**
89
+ * Build baseline assertions matching the legacy expand-tasks behavior.
90
+ *
91
+ * - "full": all assertions carried over
92
+ * - "abbreviated": only first llm-rubric with shortened prompt
93
+ * - "none": no assertions
94
+ */
95
+ export function buildBaselineAssertions(goldAssertions, rubricMode) {
96
+ const mode = rubricMode ?? "full";
97
+ if (mode === "none")
98
+ return [];
99
+ if (mode === "full")
100
+ return [...goldAssertions];
101
+ // Abbreviated: keep first llm-rubric as summary, skip rest
102
+ const abbreviated = [];
103
+ let foundFirst = false;
104
+ for (const a of goldAssertions) {
105
+ if (a.type === "llm-rubric") {
106
+ if (!foundFirst) {
107
+ foundFirst = true;
108
+ abbreviated.push({
109
+ type: "llm-rubric",
110
+ value: "Score task completion from 0 to 100 (same criteria as above).\n" +
111
+ 'Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}',
112
+ ...(a.provider ? { provider: a.provider } : {}),
113
+ });
114
+ }
115
+ }
116
+ }
117
+ return abbreviated;
118
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Literacy task compilation — core compilation logic.
3
+ *
4
+ * Produces the same structure as the legacy expand-tasks.ts path:
5
+ * - Gold entry with with-docs prompt and canonical doc context
6
+ * - Baseline entry with without-docs prompt and empty docs
7
+ * - Rubric assertions with structured dimension metadata
8
+ */
9
+ import type { LiteracyTaskDefinition } from "../../../../_vendor/ailf-core/index.d.ts";
10
+ import type { LiteracyCompileOptions, LiteracyCompileResult } from "./types.js";
11
+ /**
12
+ * Compile a literacy task into Promptfoo configuration.
13
+ */
14
+ export declare function compileLiteracyTask(task: LiteracyTaskDefinition, options?: LiteracyCompileOptions): LiteracyCompileResult;
@@ -0,0 +1,105 @@
1
+ /**
2
+ * Literacy task compilation — core compilation logic.
3
+ *
4
+ * Produces the same structure as the legacy expand-tasks.ts path:
5
+ * - Gold entry with with-docs prompt and canonical doc context
6
+ * - Baseline entry with without-docs prompt and empty docs
7
+ * - Rubric assertions with structured dimension metadata
8
+ */
9
+ import { LiteracyVariant, } from "../../../normalize-mode.js";
10
+ import { buildBaselineAssertions, resolveAssertions } from "./assertions.js";
11
+ import { LITERACY_PROMPT_TEMPLATES } from "./prompts.js";
12
+ import { validateLiteracyTask } from "./validation.js";
13
+ /**
14
+ * Compile a literacy task into Promptfoo configuration.
15
+ */
16
+ export function compileLiteracyTask(task, options) {
17
+ const warnings = [];
18
+ const evalMode = options?.evalMode ?? LiteracyVariant.STANDARD;
19
+ // Validation
20
+ for (const err of validateLiteracyTask(task)) {
21
+ warnings.push(`Literacy task "${task.id}": ${err.field} — ${err.message}`);
22
+ }
23
+ const providers = buildProviders(options);
24
+ const prompts = buildPrompts(evalMode);
25
+ const tests = buildTestCases(task, evalMode, options, warnings);
26
+ return { providers, tests, prompts, warnings };
27
+ }
28
+ // ---------------------------------------------------------------------------
29
+ // Provider assembly
30
+ // ---------------------------------------------------------------------------
31
+ function buildProviders(options) {
32
+ if (options?.models && options.models.length > 0) {
33
+ return options.models.map((m) => ({
34
+ id: m.id,
35
+ label: m.label,
36
+ config: m.config,
37
+ }));
38
+ }
39
+ return [];
40
+ }
41
+ // ---------------------------------------------------------------------------
42
+ // Prompt assembly
43
+ // ---------------------------------------------------------------------------
44
+ function templateToPromptfoo(pt) {
45
+ return { id: pt.id, label: pt.label, raw: pt.template };
46
+ }
47
+ function buildPrompts(evalMode) {
48
+ if (evalMode === "agentic") {
49
+ return [templateToPromptfoo(LITERACY_PROMPT_TEMPLATES["agentic"])];
50
+ }
51
+ return [
52
+ templateToPromptfoo(LITERACY_PROMPT_TEMPLATES["with-docs"]),
53
+ templateToPromptfoo(LITERACY_PROMPT_TEMPLATES["without-docs"]),
54
+ ];
55
+ }
56
+ // ---------------------------------------------------------------------------
57
+ // Test case assembly
58
+ // ---------------------------------------------------------------------------
59
+ function buildTestCases(task, evalMode, options, warnings) {
60
+ const tests = [];
61
+ const promptText = task.prompt?.text ?? task.prompt?.template ?? "";
62
+ const contextDocs = task.context?.docs ?? [];
63
+ const taskArea = task.area ?? "";
64
+ const taskTitle = task.title;
65
+ const promptVars = task.prompt?.vars ?? {};
66
+ const hasDocs = contextDocs.length > 0;
67
+ const docsVar = hasDocs ? `file://contexts/canonical/${task.id}.md` : "";
68
+ const assertions = resolveAssertions(task, options, warnings);
69
+ // Gold entry — canonical docs injected
70
+ const goldVars = {
71
+ task: promptText,
72
+ docs: docsVar,
73
+ __featureArea: taskArea,
74
+ ...promptVars,
75
+ };
76
+ tests.push({
77
+ description: `${taskTitle} (gold)`,
78
+ vars: goldVars,
79
+ ...(evalMode === LiteracyVariant.STANDARD
80
+ ? { prompts: ["with-docs"] }
81
+ : {}),
82
+ ...(assertions.length > 0 ? { assert: assertions } : {}),
83
+ });
84
+ // Baseline entry — no docs (floor measurement)
85
+ if (evalMode !== "agentic") {
86
+ const baselineEnabled = task.baseline?.enabled !== false;
87
+ if (baselineEnabled) {
88
+ const baselineAssertions = buildBaselineAssertions(assertions, task.baseline?.rubric);
89
+ tests.push({
90
+ description: `${taskTitle} (baseline)`,
91
+ vars: {
92
+ task: promptText,
93
+ docs: "",
94
+ __featureArea: taskArea,
95
+ ...promptVars,
96
+ },
97
+ prompts: ["without-docs"],
98
+ ...(baselineAssertions.length > 0
99
+ ? { assert: baselineAssertions }
100
+ : {}),
101
+ });
102
+ }
103
+ }
104
+ return tests;
105
+ }
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Literacy mode handler — compiles LiteracyTaskDefinition into Promptfoo config.
3
+ *
4
+ * @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
5
+ */
6
+ import type { ModeHandler } from "../../../../_vendor/ailf-core/index.d.ts";
7
+ export { LITERACY_PROMPT_TEMPLATES } from "./prompts.js";
8
+ export { validateLiteracyTask, type LiteracyValidationError, } from "./validation.js";
9
+ export { compileLiteracyTask } from "./compiler.js";
10
+ export type { LiteracyCompileOptions, LiteracyCompileResult, RubricConfig, } from "./types.js";
11
+ export declare const handler: ModeHandler;
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Literacy mode handler — compiles LiteracyTaskDefinition into Promptfoo config.
3
+ *
4
+ * @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
5
+ */
6
+ import { compileLiteracyTask } from "./compiler.js";
7
+ import { LITERACY_PROMPT_TEMPLATES } from "./prompts.js";
8
+ // Re-export public API
9
+ export { LITERACY_PROMPT_TEMPLATES } from "./prompts.js";
10
+ export { validateLiteracyTask, } from "./validation.js";
11
+ export { compileLiteracyTask } from "./compiler.js";
12
+ // ---------------------------------------------------------------------------
13
+ // ModeHandler adapter — wraps compileLiteracyTask for registry dispatch
14
+ // ---------------------------------------------------------------------------
15
+ export const handler = {
16
+ getPrompts() {
17
+ return LITERACY_PROMPT_TEMPLATES;
18
+ },
19
+ compileTask(task, ctx) {
20
+ if (task.mode !== "literacy") {
21
+ throw new Error(`Literacy handler received task with mode "${task.mode}" — expected "literacy"`);
22
+ }
23
+ const result = compileLiteracyTask(task, {
24
+ graderProvider: ctx.graderProvider,
25
+ rootDir: ctx.rootDir,
26
+ models: ctx.models,
27
+ rubricConfig: ctx.rubricConfig,
28
+ evalMode: ctx
29
+ .evalMode,
30
+ });
31
+ return {
32
+ providers: result.providers,
33
+ tests: result.tests,
34
+ prompts: result.prompts,
35
+ warnings: result.warnings,
36
+ };
37
+ },
38
+ };
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Canonical prompt templates for literacy-mode evaluations.
3
+ *
4
+ * These are the source-of-truth templates. Previously lived in
5
+ * config/prompts.ts as global templates; now handler-owned so
6
+ * non-literacy modes can define their own prompts without collision.
7
+ */
8
+ import type { PromptTemplate } from "../../../../_vendor/ailf-core/index.d.ts";
9
+ export declare const LITERACY_PROMPT_TEMPLATES: Record<string, PromptTemplate>;
@@ -0,0 +1,74 @@
1
+ /**
2
+ * Canonical prompt templates for literacy-mode evaluations.
3
+ *
4
+ * These are the source-of-truth templates. Previously lived in
5
+ * config/prompts.ts as global templates; now handler-owned so
6
+ * non-literacy modes can define their own prompts without collision.
7
+ */
8
+ export const LITERACY_PROMPT_TEMPLATES = {
9
+ "with-docs": {
10
+ id: "with-docs",
11
+ label: "With Documentation",
12
+ template: `You are an expert Sanity.io developer. Use the following documentation to help implement the task.
13
+
14
+ ## Sanity Documentation
15
+ {{docs}}
16
+
17
+ ## Task
18
+ {{task}}
19
+
20
+ ## Requirements
21
+
22
+ 1. Use ONLY the APIs and patterns shown in the documentation
23
+ 2. Provide a complete, working implementation
24
+ 3. Include all necessary imports
25
+ 4. Follow Sanity best practices as documented
26
+
27
+ Provide your implementation:
28
+ `,
29
+ variables: ["docs", "task"],
30
+ },
31
+ "without-docs": {
32
+ id: "without-docs",
33
+ label: "Baseline (No Docs)",
34
+ template: `You are an expert Sanity.io developer.
35
+
36
+ ## Task
37
+ {{task}}
38
+
39
+ ## Requirements
40
+
41
+ 1. Provide a complete, working implementation
42
+ 2. Include all necessary imports
43
+ 3. Follow Sanity best practices
44
+
45
+ Provide your implementation:
46
+ `,
47
+ variables: ["task"],
48
+ },
49
+ agentic: {
50
+ id: "agentic",
51
+ label: "Agentic (self-retrieval)",
52
+ template: `You are an expert developer helping implement a Sanity.io feature.
53
+ You have access to web search and page fetching tools.
54
+
55
+ IMPORTANT: Before writing any code, search for and read the relevant
56
+ Sanity.io documentation to ensure you are using the latest APIs and
57
+ best practices. Do not rely on memory alone.
58
+
59
+ ## Task
60
+ {{task}}
61
+
62
+ ## Requirements
63
+
64
+ 1. Search for relevant Sanity documentation before implementing
65
+ 2. Use ONLY the APIs and patterns from the current official docs
66
+ 3. Provide a complete, working implementation
67
+ 4. Include all necessary imports
68
+ 5. Follow Sanity best practices as documented
69
+
70
+ Provide your implementation:
71
+ `,
72
+ variables: ["task"],
73
+ },
74
+ };
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Shared types for the literacy mode handler.
3
+ */
4
+ import type { PromptfooPrompt, PromptfooProvider, PromptfooTestCase } from "../../promptfoo-compiler.js";
5
+ /** Options for compiling a literacy task */
6
+ export interface LiteracyCompileOptions {
7
+ /** Grader provider for LLM-graded assertions */
8
+ graderProvider?: string;
9
+ /** Root directory (for resolving file:// doc paths) */
10
+ rootDir?: string;
11
+ /** Evaluation sub-mode — controls which entries are generated */
12
+ evalMode?: import("../../../normalize-mode.js").LiteracyEvalSubMode;
13
+ /** Model providers to include */
14
+ models?: {
15
+ id: string;
16
+ label: string;
17
+ config?: Record<string, unknown>;
18
+ }[];
19
+ /** Rubric config (templates, weights, profiles) — loaded from rubrics config */
20
+ rubricConfig?: RubricConfig;
21
+ }
22
+ /** Minimal rubric config needed by the handler */
23
+ export interface RubricConfig {
24
+ templates: Record<string, {
25
+ dimension?: string;
26
+ header: string;
27
+ scale: string[];
28
+ criteria_label?: string;
29
+ }>;
30
+ }
31
+ /** Result of compiling a single literacy task */
32
+ export interface LiteracyCompileResult {
33
+ /** Promptfoo provider configs */
34
+ providers: PromptfooProvider[];
35
+ /** Compiled test cases (gold + optional baseline) */
36
+ tests: PromptfooTestCase[];
37
+ /** Prompts for evaluation */
38
+ prompts: PromptfooPrompt[];
39
+ /** Warnings generated during compilation */
40
+ warnings: string[];
41
+ }
@@ -0,0 +1,4 @@
1
+ /**
2
+ * Shared types for the literacy mode handler.
3
+ */
4
+ export {};
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Validation for literacy task definitions.
3
+ */
4
+ import type { LiteracyTaskDefinition } from "../../../../_vendor/ailf-core/index.d.ts";
5
+ export interface LiteracyValidationError {
6
+ field: string;
7
+ message: string;
8
+ }
9
+ /**
10
+ * Validate a literacy task definition.
11
+ */
12
+ export declare function validateLiteracyTask(task: LiteracyTaskDefinition): LiteracyValidationError[];
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Validation for literacy task definitions.
3
+ */
4
+ /**
5
+ * Validate a literacy task definition.
6
+ */
7
+ export function validateLiteracyTask(task) {
8
+ const errors = [];
9
+ if (!task.id)
10
+ errors.push({ field: "id", message: "Task ID is required" });
11
+ if (!task.title) {
12
+ errors.push({
13
+ field: "title",
14
+ message: "Task title is required",
15
+ });
16
+ }
17
+ const promptText = task.prompt?.text ??
18
+ task.prompt?.template ??
19
+ task.prompt?.vars?.task ??
20
+ "";
21
+ if (!promptText) {
22
+ errors.push({
23
+ field: "prompt",
24
+ message: "Task prompt text is required",
25
+ });
26
+ }
27
+ return errors;
28
+ }
@@ -0,0 +1,42 @@
1
+ /**
2
+ * MCP-specific assertion types — ergonomic assertions for MCP server testing.
3
+ *
4
+ * Each assertion type compiles down to a Promptfoo `javascript` assertion
5
+ * with the appropriate validation logic. The developer writes:
6
+ *
7
+ * ```typescript
8
+ * assertions: [
9
+ * { type: "tool-called", value: "getDocument" },
10
+ * { type: "tool-input-matches", value: { documentId: "doc-123" } },
11
+ * { type: "tool-output-matches", value: { title: "Hello" } },
12
+ * { type: "error-returned", value: { code: -32602 } },
13
+ * ]
14
+ * ```
15
+ *
16
+ * The compiler transforms these into Promptfoo-compatible `javascript`
17
+ * assertions that inspect the tool call trace in the evaluation output.
18
+ *
19
+ * @see docs/exec-plans/architecture-overhaul/phase-3-mcp-server-mode.md
20
+ */
21
+ import type { PromptfooAssertion } from "../../assertion-mapper.js";
22
+ import type { MCPAssertionContext } from "./types.js";
23
+ /** An AILF assertion definition — accepts both core and generalized types */
24
+ interface AssertionInput {
25
+ type: string;
26
+ value?: unknown;
27
+ weight?: number;
28
+ /** Allow additional properties from generalized assertions */
29
+ [key: string]: unknown;
30
+ }
31
+ /**
32
+ * Build MCP-specific assertions from task assertion definitions.
33
+ *
34
+ * Handles both MCP-specific types (tool-called, tool-input-matches, etc.)
35
+ * and standard assertion types (contains, llm-rubric, etc.) which are
36
+ * passed through unchanged.
37
+ */
38
+ export declare function buildMCPAssertions(assertions: AssertionInput[], context: MCPAssertionContext): {
39
+ assertions: PromptfooAssertion[];
40
+ warnings: string[];
41
+ };
42
+ export {};