@sanity/ailf 0.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/README.md +0 -1
  2. package/config/features.ts +23 -0
  3. package/config/models.ts +95 -0
  4. package/config/prompts.ts +16 -0
  5. package/config/rubrics.ts +225 -0
  6. package/config/schedules.ts +47 -0
  7. package/config/sinks.ts +37 -0
  8. package/config/sources.ts +21 -0
  9. package/config/thresholds.ts +61 -0
  10. package/dist/_vendor/ailf-core/config-helpers.d.ts +171 -0
  11. package/dist/_vendor/ailf-core/config-helpers.js +170 -0
  12. package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
  13. package/dist/_vendor/ailf-core/env-helper.js +45 -0
  14. package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
  15. package/dist/_vendor/ailf-core/examples/index.js +25 -0
  16. package/dist/_vendor/ailf-core/index.d.ts +3 -0
  17. package/dist/_vendor/ailf-core/index.js +5 -0
  18. package/dist/_vendor/ailf-core/ports/context.d.ts +17 -2
  19. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
  20. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
  21. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
  22. package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
  23. package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
  24. package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
  25. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +8 -2
  26. package/dist/_vendor/ailf-core/schemas/eval-config.js +17 -2
  27. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +9 -3
  28. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +8 -1
  29. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -31
  30. package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -9
  31. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
  32. package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
  33. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
  34. package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
  35. package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
  36. package/dist/_vendor/ailf-core/services/index.js +2 -1
  37. package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
  38. package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
  39. package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
  40. package/dist/_vendor/ailf-core/services/scoring.js +25 -15
  41. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
  42. package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
  43. package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
  44. package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +332 -0
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
  47. package/dist/_vendor/ailf-core/types/index.d.ts +45 -83
  48. package/dist/_vendor/ailf-core/types/index.js +8 -1
  49. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +257 -0
  50. package/dist/_vendor/ailf-core/types/plugin-registry.js +185 -0
  51. package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
  52. package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
  53. package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
  54. package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
  55. package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
  56. package/dist/_vendor/ailf-core/types/trace.js +18 -0
  57. package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
  58. package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
  59. package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
  60. package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
  61. package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
  62. package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
  63. package/dist/_vendor/ailf-shared/index.d.ts +0 -1
  64. package/dist/_vendor/ailf-shared/index.js +0 -1
  65. package/dist/adapters/api-client/build-request.js +14 -13
  66. package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
  67. package/dist/adapters/config-sources/file-config-adapter.js +39 -12
  68. package/dist/adapters/config-sources/index.d.ts +2 -0
  69. package/dist/adapters/config-sources/index.js +1 -0
  70. package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
  71. package/dist/adapters/config-sources/ts-config-loader.js +141 -0
  72. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
  73. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
  74. package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
  75. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  76. package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
  77. package/dist/adapters/task-sources/content-lake-task-source.js +35 -39
  78. package/dist/adapters/task-sources/index.d.ts +3 -2
  79. package/dist/adapters/task-sources/index.js +3 -2
  80. package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
  81. package/dist/adapters/task-sources/repo-schemas.js +227 -19
  82. package/dist/adapters/task-sources/repo-task-source.d.ts +16 -12
  83. package/dist/adapters/task-sources/repo-task-source.js +92 -80
  84. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  85. package/dist/adapters/task-sources/repo-validation.js +126 -5
  86. package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
  87. package/dist/adapters/task-sources/task-file-loader.js +83 -0
  88. package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
  89. package/dist/adapters/task-sources/yaml-task-source.js +19 -16
  90. package/dist/cli.js +0 -2
  91. package/dist/commands/baseline.js +4 -1
  92. package/dist/commands/calculate-scores.js +1 -1
  93. package/dist/commands/coverage-audit.js +9 -1
  94. package/dist/commands/explain-handler.js +25 -23
  95. package/dist/commands/fetch-docs.js +3 -2
  96. package/dist/commands/generate-configs.js +1 -1
  97. package/dist/commands/init.d.ts +6 -4
  98. package/dist/commands/init.js +302 -23
  99. package/dist/commands/interactive.js +11 -7
  100. package/dist/commands/pipeline-action.d.ts +2 -0
  101. package/dist/commands/pipeline-action.js +16 -6
  102. package/dist/commands/pipeline.d.ts +1 -0
  103. package/dist/commands/pipeline.js +4 -2
  104. package/dist/commands/pr-comment.js +1 -1
  105. package/dist/commands/publish.js +2 -2
  106. package/dist/commands/readiness-report.js +13 -6
  107. package/dist/commands/validate-tasks.d.ts +2 -2
  108. package/dist/commands/validate-tasks.js +26 -15
  109. package/dist/composition-root.d.ts +13 -1
  110. package/dist/composition-root.js +99 -4
  111. package/dist/index.d.ts +41 -0
  112. package/dist/index.js +48 -0
  113. package/dist/orchestration/build-app-context.js +1 -0
  114. package/dist/orchestration/build-step-sequence.js +28 -8
  115. package/dist/orchestration/steps/calculate-scores-step.js +24 -11
  116. package/dist/orchestration/steps/fetch-docs-step.js +8 -7
  117. package/dist/orchestration/steps/gap-analysis-step.js +8 -7
  118. package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
  119. package/dist/orchestration/steps/generate-configs-step.js +261 -51
  120. package/dist/orchestration/steps/grader-consistency-step.js +7 -4
  121. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  122. package/dist/orchestration/steps/readiness-step.js +5 -6
  123. package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
  124. package/dist/orchestration/steps/run-eval-step.js +8 -7
  125. package/dist/pipeline/cache.d.ts +1 -1
  126. package/dist/pipeline/cache.js +36 -8
  127. package/dist/pipeline/calculate-scores.d.ts +2 -4
  128. package/dist/pipeline/calculate-scores.js +43 -113
  129. package/dist/pipeline/checks.js +2 -2
  130. package/dist/pipeline/compare.js +8 -8
  131. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
  132. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
  133. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
  134. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
  135. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
  136. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
  137. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
  138. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
  139. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
  140. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +392 -0
  141. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
  142. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
  143. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
  144. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
  145. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
  146. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +404 -0
  147. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
  148. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
  149. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
  150. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
  151. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
  152. package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
  153. package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
  154. package/dist/pipeline/compiler/assertion-mapper.js +175 -0
  155. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
  156. package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
  157. package/dist/pipeline/compiler/config-loader.d.ts +56 -0
  158. package/dist/pipeline/compiler/config-loader.js +111 -0
  159. package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
  160. package/dist/pipeline/compiler/fixture-resolver.js +113 -0
  161. package/dist/pipeline/compiler/hash.d.ts +11 -0
  162. package/dist/pipeline/compiler/hash.js +18 -0
  163. package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
  164. package/dist/pipeline/compiler/ignore-fields.js +113 -0
  165. package/dist/pipeline/compiler/index.d.ts +29 -0
  166. package/dist/pipeline/compiler/index.js +45 -0
  167. package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
  168. package/dist/pipeline/compiler/literacy-bridge.js +172 -0
  169. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  170. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  171. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  172. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  173. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  174. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  175. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
  176. package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
  177. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  178. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  179. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
  180. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
  181. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
  182. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
  183. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
  184. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
  185. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  186. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  187. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  188. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  189. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  190. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  191. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  192. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  193. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  194. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  195. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  196. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  197. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  198. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  199. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  200. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  201. package/dist/pipeline/compiler/mode-handlers/index.d.ts +15 -0
  202. package/dist/pipeline/compiler/mode-handlers/index.js +19 -0
  203. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  204. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  205. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  206. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  207. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  208. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  209. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  210. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  211. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  212. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  213. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  214. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  215. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  216. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  217. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  218. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  219. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  220. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  221. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  222. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  223. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  224. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  225. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  226. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  227. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  228. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  229. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  230. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  231. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  232. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  233. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  234. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  235. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  236. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
  237. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  238. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  239. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  240. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  241. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  242. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
  243. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
  245. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  246. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  247. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
  248. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
  249. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
  250. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  251. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  252. package/dist/pipeline/compiler/preset-loader.js +99 -0
  253. package/dist/pipeline/compiler/presets/index.d.ts +9 -0
  254. package/dist/pipeline/compiler/presets/index.js +8 -0
  255. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +42 -0
  256. package/dist/pipeline/compiler/presets/sanity-literacy.js +208 -0
  257. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
  258. package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
  259. package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
  260. package/dist/pipeline/compiler/provider-assembler.js +137 -0
  261. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
  262. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
  263. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
  264. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
  265. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
  266. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
  267. package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
  268. package/dist/pipeline/compiler/sandbox/index.js +11 -0
  269. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
  270. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
  271. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
  272. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
  273. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
  274. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
  275. package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
  276. package/dist/pipeline/compiler/scoring-bridge.js +114 -0
  277. package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
  278. package/dist/pipeline/compiler/task-graph-builder.js +291 -0
  279. package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
  280. package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
  281. package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
  282. package/dist/pipeline/compiler/telemetry/index.js +19 -0
  283. package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
  284. package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
  285. package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
  286. package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
  287. package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
  288. package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
  289. package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
  290. package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
  291. package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
  292. package/dist/pipeline/compiler/variable-resolver.js +115 -0
  293. package/dist/pipeline/coverage-audit.d.ts +15 -5
  294. package/dist/pipeline/coverage-audit.js +41 -22
  295. package/dist/pipeline/eval-constants.d.ts +16 -6
  296. package/dist/pipeline/eval-constants.js +25 -4
  297. package/dist/pipeline/eval-fingerprint.d.ts +2 -2
  298. package/dist/pipeline/eval-fingerprint.js +8 -9
  299. package/dist/pipeline/expand-tasks.d.ts +19 -10
  300. package/dist/pipeline/expand-tasks.js +34 -28
  301. package/dist/pipeline/gap-analysis.d.ts +1 -1
  302. package/dist/pipeline/gap-analysis.js +2 -2
  303. package/dist/pipeline/generate-configs.d.ts +22 -4
  304. package/dist/pipeline/generate-configs.js +53 -24
  305. package/dist/pipeline/grader-api.d.ts +3 -3
  306. package/dist/pipeline/grader-api.js +5 -12
  307. package/dist/pipeline/grader-compare-runner.js +20 -27
  308. package/dist/pipeline/grader-comparison.d.ts +4 -8
  309. package/dist/pipeline/grader-comparison.js +11 -17
  310. package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
  311. package/dist/pipeline/grader-consistency-runner.js +16 -20
  312. package/dist/pipeline/grader-consistency.d.ts +6 -10
  313. package/dist/pipeline/grader-consistency.js +13 -32
  314. package/dist/pipeline/grader-sensitivity-runner.js +7 -5
  315. package/dist/pipeline/grader-sensitivity.d.ts +2 -6
  316. package/dist/pipeline/grader-sensitivity.js +10 -10
  317. package/dist/pipeline/grader-validate-runner.js +7 -5
  318. package/dist/pipeline/grader-validation.d.ts +2 -6
  319. package/dist/pipeline/grader-validation.js +14 -22
  320. package/dist/pipeline/map-request-to-config.js +7 -1
  321. package/dist/pipeline/mirror-repo-tasks.d.ts +13 -13
  322. package/dist/pipeline/mirror-repo-tasks.js +22 -21
  323. package/dist/pipeline/normalize-mode.d.ts +49 -0
  324. package/dist/pipeline/normalize-mode.js +64 -0
  325. package/dist/pipeline/plan.d.ts +5 -2
  326. package/dist/pipeline/plan.js +134 -78
  327. package/dist/pipeline/pr-comment.js +2 -0
  328. package/dist/pipeline/profile-resolution.d.ts +22 -14
  329. package/dist/pipeline/profile-resolution.js +41 -19
  330. package/dist/pipeline/provenance.d.ts +2 -2
  331. package/dist/pipeline/provenance.js +12 -17
  332. package/dist/pipeline/release-report.js +4 -4
  333. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  334. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  335. package/dist/pipeline/rubric-loader.d.ts +20 -0
  336. package/dist/pipeline/rubric-loader.js +37 -0
  337. package/dist/pipeline/validate.d.ts +4 -4
  338. package/dist/pipeline/validate.js +64 -53
  339. package/dist/schedules/loader.js +18 -8
  340. package/dist/scripts/migrate-task-mode.d.ts +24 -0
  341. package/dist/scripts/migrate-task-mode.js +85 -0
  342. package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
  343. package/dist/scripts/validate-task-sources.d.ts +1 -1
  344. package/dist/scripts/validate-task-sources.js +15 -15
  345. package/dist/sinks/loader.js +5 -7
  346. package/dist/sources.d.ts +7 -7
  347. package/dist/sources.js +22 -24
  348. package/dist/webhook/dispatch.js +2 -1
  349. package/package.json +15 -4
  350. package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
  351. package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
  352. package/tasks/literacy/frameworks.task.ts +128 -0
  353. package/tasks/literacy/functions.task.ts +69 -0
  354. package/tasks/literacy/groq.task.ts +258 -0
  355. package/tasks/literacy/nextjs-live.task.ts +75 -0
  356. package/tasks/literacy/studio-setup.task.ts +131 -0
  357. package/tasks/literacy/visual-editing.task.ts +146 -0
  358. package/config/features.yaml +0 -116
  359. package/config/models.yaml +0 -116
  360. package/config/prompts.yaml +0 -75
  361. package/config/rubrics.yaml +0 -81
  362. package/config/schedules.yaml +0 -43
  363. package/config/sinks.yaml +0 -54
  364. package/config/sources.yaml +0 -51
  365. package/config/thresholds.yaml +0 -49
  366. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  367. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  368. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  369. package/dist/_vendor/ailf-tasks/index.js +0 -16
  370. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  371. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  372. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  373. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  374. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  375. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  376. package/dist/agent-observer/test-imports.d.ts +0 -7
  377. package/dist/agent-observer/test-imports.js +0 -185
@@ -0,0 +1,392 @@
1
+ /**
2
+ * mcp-server-handler.test.ts — Tests for MCP server mode compilation.
3
+ *
4
+ * Tests the full MCP compilation pipeline: task validation, provider
5
+ * assembly, assertion mapping, test case generation, and end-to-end
6
+ * compilation of example tasks.
7
+ *
8
+ * Run: npx tsx --test src/pipeline/compiler/__tests__/mcp-server-handler.test.ts
9
+ */
10
+ import assert from "node:assert/strict";
11
+ import { describe, it } from "node:test";
12
+ import { LiteracyVariant } from "../../normalize-mode.js";
13
+ import { buildMCPAssertions, compileMCPTask, handler as mcpHandler, MCP_PROMPT_TEMPLATES, validateMCPTask, } from "../mode-handlers/mcp-server/index.js";
14
+ import { allMCPExampleTasks, createAndPublishTask, inspectSchemaTask, queryDocumentsTask, semanticSearchTask, stdioServerTask, } from "../mode-handlers/__fixtures__/mcp-example-tasks.js";
15
+ // ---------------------------------------------------------------------------
16
+ // Helpers
17
+ // ---------------------------------------------------------------------------
18
+ function makeMinimalMCPTask(overrides) {
19
+ return {
20
+ mode: "mcp-server",
21
+ id: "test-mcp-task",
22
+ title: "Test MCP Task",
23
+ description: "A test MCP server evaluation task",
24
+ area: "mcp-server",
25
+ ...overrides,
26
+ };
27
+ }
28
+ /** Test models for compilation — simulates models from the registry */
29
+ const TEST_MODELS = [
30
+ {
31
+ id: "anthropic:messages:claude-opus-4-6",
32
+ label: "Claude Opus 4.6",
33
+ config: { temperature: 0.2 },
34
+ },
35
+ {
36
+ id: "openai:responses:gpt-5.4",
37
+ label: "GPT 5.4",
38
+ config: { reasoning_effort: "medium" },
39
+ },
40
+ ];
41
+ /** The custom MCP provider file:// path */
42
+ const MCP_PROVIDER_PATH = "file://dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js";
43
+ /** Helper to get provider config */
44
+ function cfg(provider) {
45
+ return provider.config;
46
+ }
47
+ /** Helper to get mcpServer sub-config from provider */
48
+ function serverCfg(provider) {
49
+ return cfg(provider)?.mcpServer;
50
+ }
51
+ // ---------------------------------------------------------------------------
52
+ // handler.getPrompts() — prompt template ownership
53
+ // ---------------------------------------------------------------------------
54
+ describe("MCPServerHandler.getPrompts", () => {
55
+ it("returns prompt templates", () => {
56
+ const prompts = mcpHandler.getPrompts();
57
+ assert.ok(prompts, "getPrompts() should return a record");
58
+ assert.ok(Object.keys(prompts).length > 0, "should return at least one template");
59
+ });
60
+ it("returns templates keyed by MCP-specific IDs (not literacy names)", () => {
61
+ const prompts = mcpHandler.getPrompts();
62
+ const keys = Object.keys(prompts);
63
+ assert.ok(!keys.includes("with-docs"), "should not use literacy key 'with-docs'");
64
+ assert.ok(!keys.includes("without-docs"), "should not use literacy key 'without-docs'");
65
+ assert.ok(!keys.includes(LiteracyVariant.AGENTIC), "should not use literacy key 'agentic'");
66
+ assert.ok(keys.includes("mcp-server"), "should include 'mcp-server' template");
67
+ });
68
+ it("mcp-server template instructs model to use MCP tools", () => {
69
+ const prompts = mcpHandler.getPrompts();
70
+ const template = prompts["mcp-server"];
71
+ assert.ok(template, "mcp-server template should exist");
72
+ assert.ok(template.template.includes("{{task}}"), "should include {{task}} placeholder");
73
+ assert.ok(/tool/i.test(template.template), "template should mention tools (MCP-appropriate content)");
74
+ });
75
+ it("template has correct PromptTemplate shape", () => {
76
+ const prompts = mcpHandler.getPrompts();
77
+ const template = prompts["mcp-server"];
78
+ assert.equal(template.id, "mcp-server");
79
+ assert.ok(template.label, "should have a human-readable label");
80
+ assert.ok(template.template, "should have a template string");
81
+ assert.ok(Array.isArray(template.variables), "should declare variables");
82
+ assert.ok(template.variables.includes("task"), "variables should include 'task'");
83
+ });
84
+ it("exported MCP_PROMPT_TEMPLATES matches handler.getPrompts()", () => {
85
+ const fromHandler = mcpHandler.getPrompts();
86
+ assert.deepEqual(fromHandler, MCP_PROMPT_TEMPLATES);
87
+ });
88
+ });
89
+ // ---------------------------------------------------------------------------
90
+ // validateMCPTask
91
+ // ---------------------------------------------------------------------------
92
+ describe("validateMCPTask", () => {
93
+ it("passes for a valid minimal task", () => {
94
+ const errors = validateMCPTask(makeMinimalMCPTask());
95
+ assert.equal(errors.length, 0);
96
+ });
97
+ it("errors on missing ID", () => {
98
+ const errors = validateMCPTask(makeMinimalMCPTask({ id: "" }));
99
+ assert.ok(errors.some((e) => e.field === "id"));
100
+ });
101
+ it("errors on missing title", () => {
102
+ const errors = validateMCPTask(makeMinimalMCPTask({ title: "" }));
103
+ assert.ok(errors.some((e) => e.field === "title"));
104
+ });
105
+ it("errors on stdio transport without command", () => {
106
+ const errors = validateMCPTask(makeMinimalMCPTask({
107
+ serverConfig: { transport: "stdio" },
108
+ }));
109
+ assert.ok(errors.some((e) => e.field === "serverConfig.command"));
110
+ });
111
+ it("errors on sse transport without url", () => {
112
+ const errors = validateMCPTask(makeMinimalMCPTask({
113
+ serverConfig: { transport: "sse" },
114
+ }));
115
+ assert.ok(errors.some((e) => e.field === "serverConfig.url"));
116
+ });
117
+ it("passes for valid stdio config", () => {
118
+ const errors = validateMCPTask(makeMinimalMCPTask({
119
+ serverConfig: {
120
+ transport: "stdio",
121
+ command: "node dist/server.js",
122
+ },
123
+ }));
124
+ assert.equal(errors.length, 0);
125
+ });
126
+ it("passes for valid sse config", () => {
127
+ const errors = validateMCPTask(makeMinimalMCPTask({
128
+ serverConfig: {
129
+ transport: "sse",
130
+ url: "http://localhost:3000/sse",
131
+ },
132
+ }));
133
+ assert.equal(errors.length, 0);
134
+ });
135
+ it("errors on tool-called without value", () => {
136
+ const errors = validateMCPTask(makeMinimalMCPTask({
137
+ assertions: [{ type: "tool-called" }],
138
+ }));
139
+ assert.ok(errors.some((e) => e.field === "assertions"));
140
+ });
141
+ });
142
+ // ---------------------------------------------------------------------------
143
+ // compileMCPTask — provider assembly
144
+ // ---------------------------------------------------------------------------
145
+ describe("compileMCPTask", () => {
146
+ it("produces provider, tests, and prompts", () => {
147
+ const result = compileMCPTask(makeMinimalMCPTask(), { models: TEST_MODELS });
148
+ assert.ok(result.providers.length > 0, "Should produce providers");
149
+ assert.ok(result.tests.length > 0, "Should produce test cases");
150
+ assert.ok(result.prompts.length > 0, "Should produce prompts");
151
+ });
152
+ it("emits file:// providers using the custom MCP tool provider", () => {
153
+ const result = compileMCPTask(makeMinimalMCPTask({
154
+ serverConfig: {
155
+ transport: "stdio",
156
+ command: "node dist/server.js --flag",
157
+ },
158
+ }), { models: TEST_MODELS });
159
+ assert.equal(result.providers.length, 2, "One provider per model");
160
+ // All providers use the custom MCP tool provider path
161
+ assert.equal(result.providers[0].id, MCP_PROVIDER_PATH);
162
+ assert.equal(result.providers[1].id, MCP_PROVIDER_PATH);
163
+ // Model ID is passed in config
164
+ assert.equal(cfg(result.providers[0]).model, "anthropic:messages:claude-opus-4-6");
165
+ assert.equal(cfg(result.providers[1]).model, "openai:responses:gpt-5.4");
166
+ // MCP server config is in config.mcpServer
167
+ const server = serverCfg(result.providers[0]);
168
+ assert.equal(server.command, "node dist/server.js --flag");
169
+ });
170
+ it("preserves model config in provider config", () => {
171
+ const result = compileMCPTask(makeMinimalMCPTask({
172
+ serverConfig: { transport: "sse", url: "http://localhost:3000/sse" },
173
+ }), { models: TEST_MODELS });
174
+ const c = cfg(result.providers[0]);
175
+ assert.equal(c.temperature, 0.2, "Model config preserved");
176
+ assert.ok(c.mcpServer, "MCP server config present");
177
+ assert.equal(c.maxToolRounds, 5, "Default maxToolRounds");
178
+ });
179
+ it("builds MCP server config for URL-based transport", () => {
180
+ const result = compileMCPTask(makeMinimalMCPTask({
181
+ serverConfig: { transport: "sse", url: "http://localhost:3000/sse" },
182
+ }), { models: TEST_MODELS });
183
+ const server = serverCfg(result.providers[0]);
184
+ assert.equal(server.url, "http://localhost:3000/sse");
185
+ });
186
+ it("maps auth config to mcpServer config", () => {
187
+ const result = compileMCPTask(makeMinimalMCPTask({
188
+ serverConfig: {
189
+ transport: "streamable-http",
190
+ url: "https://mcp.example.com",
191
+ auth: { type: "bearer", token: "{{env.MY_TOKEN}}" },
192
+ },
193
+ }), { models: TEST_MODELS });
194
+ const server = serverCfg(result.providers[0]);
195
+ assert.deepEqual(server.auth, { type: "bearer", token: "{{env.MY_TOKEN}}" });
196
+ });
197
+ it("maps capabilities to mcpTools config", () => {
198
+ const result = compileMCPTask(makeMinimalMCPTask({
199
+ capabilities: ["query_documents", "get_schema"],
200
+ serverConfig: {
201
+ transport: "streamable-http",
202
+ url: "https://mcp.example.com",
203
+ },
204
+ }), { models: TEST_MODELS });
205
+ assert.deepEqual(cfg(result.providers[0]).mcpTools, [
206
+ "query_documents",
207
+ "get_schema",
208
+ ]);
209
+ });
210
+ it("uses task-level models override when specified", () => {
211
+ const result = compileMCPTask(makeMinimalMCPTask({
212
+ models: ["anthropic:messages:claude-sonnet-4-20250514"],
213
+ serverConfig: { transport: "sse", url: "http://localhost:3000" },
214
+ }), { models: TEST_MODELS });
215
+ assert.equal(result.providers.length, 1);
216
+ assert.equal(cfg(result.providers[0]).model, "anthropic:messages:claude-sonnet-4-20250514");
217
+ });
218
+ it("respects task-level maxToolRounds", () => {
219
+ const result = compileMCPTask(makeMinimalMCPTask({ maxToolRounds: 10 }), {
220
+ models: TEST_MODELS,
221
+ });
222
+ assert.equal(cfg(result.providers[0]).maxToolRounds, 10);
223
+ });
224
+ it("falls back to default model when no models provided", () => {
225
+ const result = compileMCPTask(makeMinimalMCPTask());
226
+ assert.ok(result.providers.length > 0, "Should have a fallback provider");
227
+ assert.ok(result.warnings.some((w) => w.includes("no models")));
228
+ });
229
+ it("uses task description as prompt text", () => {
230
+ const result = compileMCPTask(makeMinimalMCPTask({
231
+ description: "Test the getDocument tool",
232
+ }));
233
+ assert.equal(result.prompts[0].raw, "Test the getDocument tool");
234
+ });
235
+ it("prefers prompt.text over description", () => {
236
+ const result = compileMCPTask(makeMinimalMCPTask({
237
+ description: "Description",
238
+ prompt: { text: "Custom prompt text" },
239
+ }));
240
+ assert.equal(result.prompts[0].raw, "Custom prompt text");
241
+ });
242
+ it("includes task vars in test case", () => {
243
+ const result = compileMCPTask(makeMinimalMCPTask({
244
+ prompt: {
245
+ vars: { task: "Do the thing", extra: "value" },
246
+ },
247
+ }));
248
+ assert.equal(result.tests[0].vars.task, "Do the thing");
249
+ assert.equal(result.tests[0].vars.extra, "value");
250
+ });
251
+ it("creates multi-turn test case when multiTurn is defined", () => {
252
+ const result = compileMCPTask(makeMinimalMCPTask({
253
+ multiTurn: {
254
+ turns: [
255
+ { role: "user", content: "Hello" },
256
+ { role: "assistant", content: "Hi" },
257
+ ],
258
+ },
259
+ }));
260
+ assert.equal(result.tests.length, 2);
261
+ assert.ok(result.tests[1].description.includes("[multi-turn]"));
262
+ });
263
+ it("warns when serverConfig is missing", () => {
264
+ const result = compileMCPTask(makeMinimalMCPTask());
265
+ assert.ok(result.warnings.some((w) => w.includes("no serverConfig")));
266
+ });
267
+ it("sets grader provider on LLM assertions", () => {
268
+ const result = compileMCPTask(makeMinimalMCPTask({
269
+ assertions: [
270
+ {
271
+ type: "llm-rubric",
272
+ value: "Check quality",
273
+ },
274
+ ],
275
+ }), { graderProvider: "openai:chat:gpt-5" });
276
+ const llmAssert = result.tests[0].assert?.find((a) => a.type === "llm-rubric");
277
+ assert.ok(llmAssert);
278
+ assert.equal(llmAssert.provider, "openai:chat:gpt-5");
279
+ });
280
+ });
281
+ // ---------------------------------------------------------------------------
282
+ // buildMCPAssertions
283
+ // ---------------------------------------------------------------------------
284
+ describe("buildMCPAssertions", () => {
285
+ const ctx = { taskId: "test", capabilities: [], graderProvider: undefined };
286
+ it("maps tool-called to javascript assertion", () => {
287
+ const { assertions } = buildMCPAssertions([{ type: "tool-called", value: "getDocument" }], ctx);
288
+ assert.equal(assertions.length, 1);
289
+ assert.equal(assertions[0].type, "javascript");
290
+ assert.ok(assertions[0].value.includes("getDocument"), "Should reference tool name");
291
+ });
292
+ it("maps tool-input-matches to javascript assertion", () => {
293
+ const { assertions } = buildMCPAssertions([{ type: "tool-input-matches", value: { id: "doc-123" } }], ctx);
294
+ assert.equal(assertions.length, 1);
295
+ assert.equal(assertions[0].type, "javascript");
296
+ assert.ok(assertions[0].value.includes("doc-123"));
297
+ });
298
+ it("maps tool-output-matches to javascript assertion", () => {
299
+ const { assertions } = buildMCPAssertions([{ type: "tool-output-matches", value: { title: "Hello" } }], ctx);
300
+ assert.equal(assertions.length, 1);
301
+ assert.equal(assertions[0].type, "javascript");
302
+ assert.ok(assertions[0].value.includes("Hello"));
303
+ });
304
+ it("maps error-returned to javascript assertion", () => {
305
+ const { assertions } = buildMCPAssertions([{ type: "error-returned", value: { code: -32602 } }], ctx);
306
+ assert.equal(assertions.length, 1);
307
+ assert.equal(assertions[0].type, "javascript");
308
+ assert.ok(assertions[0].value.includes("-32602"));
309
+ });
310
+ it("maps capability-available to javascript assertion", () => {
311
+ const { assertions } = buildMCPAssertions([{ type: "capability-available", value: "tools/list" }], ctx);
312
+ assert.equal(assertions.length, 1);
313
+ assert.equal(assertions[0].type, "javascript");
314
+ assert.ok(assertions[0].value.includes("tools/list"));
315
+ });
316
+ it("passes through standard assertion types", () => {
317
+ const { assertions } = buildMCPAssertions([{ type: "contains", value: "result" }, { type: "is-json" }], ctx);
318
+ assert.equal(assertions.length, 2);
319
+ assert.equal(assertions[0].type, "contains");
320
+ assert.equal(assertions[1].type, "is-json");
321
+ });
322
+ it("preserves assertion weights", () => {
323
+ const { assertions } = buildMCPAssertions([{ type: "tool-called", value: "test", weight: 0.5 }], ctx);
324
+ assert.equal(assertions[0].weight, 0.5);
325
+ });
326
+ it("warns on unknown assertion type", () => {
327
+ const { warnings } = buildMCPAssertions([{ type: "unknown-type", value: "x" }], ctx);
328
+ assert.ok(warnings.some((w) => w.includes("unknown")));
329
+ });
330
+ });
331
+ // ---------------------------------------------------------------------------
332
+ // Example task compilation (end-to-end)
333
+ // ---------------------------------------------------------------------------
334
+ describe("example MCP tasks — end-to-end compilation", () => {
335
+ const opts = { models: TEST_MODELS };
336
+ it("compiles all example tasks without errors", () => {
337
+ for (const task of allMCPExampleTasks) {
338
+ const result = compileMCPTask(task, opts);
339
+ assert.ok(result.providers.length > 0, `${task.id}: should produce providers`);
340
+ assert.ok(result.tests.length > 0, `${task.id}: should produce test cases`);
341
+ assert.ok(result.prompts.length > 0, `${task.id}: should produce prompts`);
342
+ }
343
+ });
344
+ it("query task has tool-called + contains + llm-rubric assertions", () => {
345
+ const result = compileMCPTask(queryDocumentsTask, opts);
346
+ const asserts = result.tests[0].assert;
347
+ assert.equal(asserts.length, 4);
348
+ assert.equal(asserts[0].type, "javascript");
349
+ assert.equal(asserts[1].type, "contains");
350
+ assert.equal(asserts[2].type, "contains");
351
+ assert.equal(asserts[3].type, "llm-rubric");
352
+ });
353
+ it("schema task uses get_schema tool", () => {
354
+ const result = compileMCPTask(inspectSchemaTask, opts);
355
+ const asserts = result.tests[0].assert;
356
+ assert.ok(asserts.some((a) => a.type === "javascript" && a.value.includes("get_schema")), "Should have tool-called assertion for get_schema");
357
+ });
358
+ it("create-publish task produces multi-turn test case", () => {
359
+ const result = compileMCPTask(createAndPublishTask, opts);
360
+ assert.equal(result.tests.length, 2);
361
+ assert.ok(result.tests[1].description?.includes("[multi-turn]"));
362
+ });
363
+ it("stdio task uses custom provider with command config", () => {
364
+ const result = compileMCPTask(stdioServerTask, opts);
365
+ assert.equal(result.providers[0].id, MCP_PROVIDER_PATH);
366
+ assert.equal(cfg(result.providers[0]).model, "anthropic:messages:claude-opus-4-6");
367
+ const server = serverCfg(result.providers[0]);
368
+ assert.equal(server.command, "node dist/sanity-mcp-server.js");
369
+ });
370
+ it("semantic search task has two tool-called + one llm-rubric assertion", () => {
371
+ const result = compileMCPTask(semanticSearchTask, opts);
372
+ const asserts = result.tests[0].assert;
373
+ assert.equal(asserts.length, 3);
374
+ assert.equal(asserts[0].type, "javascript");
375
+ assert.ok(asserts[0].value.includes("list_embeddings_indices"));
376
+ assert.equal(asserts[1].type, "javascript");
377
+ assert.ok(asserts[1].value.includes("semantic_search"));
378
+ assert.equal(asserts[2].type, "llm-rubric");
379
+ });
380
+ it("remote task has bearer auth and tools filter", () => {
381
+ const result = compileMCPTask(queryDocumentsTask, opts);
382
+ const server = serverCfg(result.providers[0]);
383
+ assert.deepEqual(server.auth, {
384
+ type: "bearer",
385
+ token: "{{env.SANITY_MCP_AUTH_TOKEN}}",
386
+ });
387
+ assert.deepEqual(cfg(result.providers[0]).mcpTools, [
388
+ "query_documents",
389
+ "get_schema",
390
+ ]);
391
+ });
392
+ });
@@ -0,0 +1,9 @@
1
+ /**
2
+ * promptfoo-compiler.test.ts — Unit tests for the PromptfooCompiler.
3
+ *
4
+ * Tests compilation of a TaskGraph into Promptfoo configuration,
5
+ * including provider assembly, prompt generation, and test case creation.
6
+ *
7
+ * Run: npx tsx --test src/pipeline/compiler/__tests__/promptfoo-compiler.test.ts
8
+ */
9
+ export {};