@sanity/ailf 0.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/README.md +0 -1
  2. package/config/features.ts +23 -0
  3. package/config/models.ts +95 -0
  4. package/config/prompts.ts +16 -0
  5. package/config/rubrics.ts +225 -0
  6. package/config/schedules.ts +47 -0
  7. package/config/sinks.ts +37 -0
  8. package/config/sources.ts +21 -0
  9. package/config/thresholds.ts +61 -0
  10. package/dist/_vendor/ailf-core/config-helpers.d.ts +171 -0
  11. package/dist/_vendor/ailf-core/config-helpers.js +170 -0
  12. package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
  13. package/dist/_vendor/ailf-core/env-helper.js +45 -0
  14. package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
  15. package/dist/_vendor/ailf-core/examples/index.js +25 -0
  16. package/dist/_vendor/ailf-core/index.d.ts +3 -0
  17. package/dist/_vendor/ailf-core/index.js +5 -0
  18. package/dist/_vendor/ailf-core/ports/context.d.ts +17 -2
  19. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
  20. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
  21. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
  22. package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
  23. package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
  24. package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
  25. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +8 -2
  26. package/dist/_vendor/ailf-core/schemas/eval-config.js +17 -2
  27. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +9 -3
  28. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +8 -1
  29. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -31
  30. package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -9
  31. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
  32. package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
  33. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
  34. package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
  35. package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
  36. package/dist/_vendor/ailf-core/services/index.js +2 -1
  37. package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
  38. package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
  39. package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
  40. package/dist/_vendor/ailf-core/services/scoring.js +25 -15
  41. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
  42. package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
  43. package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
  44. package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +332 -0
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
  47. package/dist/_vendor/ailf-core/types/index.d.ts +45 -83
  48. package/dist/_vendor/ailf-core/types/index.js +8 -1
  49. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +257 -0
  50. package/dist/_vendor/ailf-core/types/plugin-registry.js +185 -0
  51. package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
  52. package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
  53. package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
  54. package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
  55. package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
  56. package/dist/_vendor/ailf-core/types/trace.js +18 -0
  57. package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
  58. package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
  59. package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
  60. package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
  61. package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
  62. package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
  63. package/dist/_vendor/ailf-shared/index.d.ts +0 -1
  64. package/dist/_vendor/ailf-shared/index.js +0 -1
  65. package/dist/adapters/api-client/build-request.js +14 -13
  66. package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
  67. package/dist/adapters/config-sources/file-config-adapter.js +39 -12
  68. package/dist/adapters/config-sources/index.d.ts +2 -0
  69. package/dist/adapters/config-sources/index.js +1 -0
  70. package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
  71. package/dist/adapters/config-sources/ts-config-loader.js +141 -0
  72. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
  73. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
  74. package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
  75. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  76. package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
  77. package/dist/adapters/task-sources/content-lake-task-source.js +35 -39
  78. package/dist/adapters/task-sources/index.d.ts +3 -2
  79. package/dist/adapters/task-sources/index.js +3 -2
  80. package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
  81. package/dist/adapters/task-sources/repo-schemas.js +227 -19
  82. package/dist/adapters/task-sources/repo-task-source.d.ts +16 -12
  83. package/dist/adapters/task-sources/repo-task-source.js +92 -80
  84. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  85. package/dist/adapters/task-sources/repo-validation.js +126 -5
  86. package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
  87. package/dist/adapters/task-sources/task-file-loader.js +83 -0
  88. package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
  89. package/dist/adapters/task-sources/yaml-task-source.js +19 -16
  90. package/dist/cli.js +0 -2
  91. package/dist/commands/baseline.js +4 -1
  92. package/dist/commands/calculate-scores.js +1 -1
  93. package/dist/commands/coverage-audit.js +9 -1
  94. package/dist/commands/explain-handler.js +25 -23
  95. package/dist/commands/fetch-docs.js +3 -2
  96. package/dist/commands/generate-configs.js +1 -1
  97. package/dist/commands/init.d.ts +6 -4
  98. package/dist/commands/init.js +302 -23
  99. package/dist/commands/interactive.js +11 -7
  100. package/dist/commands/pipeline-action.d.ts +2 -0
  101. package/dist/commands/pipeline-action.js +16 -6
  102. package/dist/commands/pipeline.d.ts +1 -0
  103. package/dist/commands/pipeline.js +4 -2
  104. package/dist/commands/pr-comment.js +1 -1
  105. package/dist/commands/publish.js +2 -2
  106. package/dist/commands/readiness-report.js +13 -6
  107. package/dist/commands/validate-tasks.d.ts +2 -2
  108. package/dist/commands/validate-tasks.js +26 -15
  109. package/dist/composition-root.d.ts +13 -1
  110. package/dist/composition-root.js +99 -4
  111. package/dist/index.d.ts +41 -0
  112. package/dist/index.js +48 -0
  113. package/dist/orchestration/build-app-context.js +1 -0
  114. package/dist/orchestration/build-step-sequence.js +28 -8
  115. package/dist/orchestration/steps/calculate-scores-step.js +24 -11
  116. package/dist/orchestration/steps/fetch-docs-step.js +8 -7
  117. package/dist/orchestration/steps/gap-analysis-step.js +8 -7
  118. package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
  119. package/dist/orchestration/steps/generate-configs-step.js +261 -51
  120. package/dist/orchestration/steps/grader-consistency-step.js +7 -4
  121. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  122. package/dist/orchestration/steps/readiness-step.js +5 -6
  123. package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
  124. package/dist/orchestration/steps/run-eval-step.js +8 -7
  125. package/dist/pipeline/cache.d.ts +1 -1
  126. package/dist/pipeline/cache.js +36 -8
  127. package/dist/pipeline/calculate-scores.d.ts +2 -4
  128. package/dist/pipeline/calculate-scores.js +43 -113
  129. package/dist/pipeline/checks.js +2 -2
  130. package/dist/pipeline/compare.js +8 -8
  131. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
  132. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
  133. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
  134. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
  135. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
  136. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
  137. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
  138. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
  139. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
  140. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +392 -0
  141. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
  142. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
  143. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
  144. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
  145. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
  146. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +404 -0
  147. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
  148. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
  149. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
  150. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
  151. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
  152. package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
  153. package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
  154. package/dist/pipeline/compiler/assertion-mapper.js +175 -0
  155. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
  156. package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
  157. package/dist/pipeline/compiler/config-loader.d.ts +56 -0
  158. package/dist/pipeline/compiler/config-loader.js +111 -0
  159. package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
  160. package/dist/pipeline/compiler/fixture-resolver.js +113 -0
  161. package/dist/pipeline/compiler/hash.d.ts +11 -0
  162. package/dist/pipeline/compiler/hash.js +18 -0
  163. package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
  164. package/dist/pipeline/compiler/ignore-fields.js +113 -0
  165. package/dist/pipeline/compiler/index.d.ts +29 -0
  166. package/dist/pipeline/compiler/index.js +45 -0
  167. package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
  168. package/dist/pipeline/compiler/literacy-bridge.js +172 -0
  169. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  170. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  171. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  172. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  173. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  174. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  175. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
  176. package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
  177. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  178. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  179. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
  180. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
  181. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
  182. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
  183. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
  184. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
  185. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  186. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  187. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  188. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  189. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  190. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  191. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  192. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  193. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  194. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  195. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  196. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  197. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  198. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  199. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  200. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  201. package/dist/pipeline/compiler/mode-handlers/index.d.ts +15 -0
  202. package/dist/pipeline/compiler/mode-handlers/index.js +19 -0
  203. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  204. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  205. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  206. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  207. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  208. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  209. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  210. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  211. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  212. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  213. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  214. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  215. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  216. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  217. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  218. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  219. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  220. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  221. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  222. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  223. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  224. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  225. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  226. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  227. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  228. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  229. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  230. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  231. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  232. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  233. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  234. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  235. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  236. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
  237. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  238. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  239. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  240. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  241. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  242. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
  243. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
  245. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  246. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  247. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
  248. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
  249. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
  250. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  251. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  252. package/dist/pipeline/compiler/preset-loader.js +99 -0
  253. package/dist/pipeline/compiler/presets/index.d.ts +9 -0
  254. package/dist/pipeline/compiler/presets/index.js +8 -0
  255. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +42 -0
  256. package/dist/pipeline/compiler/presets/sanity-literacy.js +208 -0
  257. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
  258. package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
  259. package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
  260. package/dist/pipeline/compiler/provider-assembler.js +137 -0
  261. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
  262. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
  263. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
  264. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
  265. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
  266. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
  267. package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
  268. package/dist/pipeline/compiler/sandbox/index.js +11 -0
  269. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
  270. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
  271. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
  272. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
  273. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
  274. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
  275. package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
  276. package/dist/pipeline/compiler/scoring-bridge.js +114 -0
  277. package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
  278. package/dist/pipeline/compiler/task-graph-builder.js +291 -0
  279. package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
  280. package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
  281. package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
  282. package/dist/pipeline/compiler/telemetry/index.js +19 -0
  283. package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
  284. package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
  285. package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
  286. package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
  287. package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
  288. package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
  289. package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
  290. package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
  291. package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
  292. package/dist/pipeline/compiler/variable-resolver.js +115 -0
  293. package/dist/pipeline/coverage-audit.d.ts +15 -5
  294. package/dist/pipeline/coverage-audit.js +41 -22
  295. package/dist/pipeline/eval-constants.d.ts +16 -6
  296. package/dist/pipeline/eval-constants.js +25 -4
  297. package/dist/pipeline/eval-fingerprint.d.ts +2 -2
  298. package/dist/pipeline/eval-fingerprint.js +8 -9
  299. package/dist/pipeline/expand-tasks.d.ts +19 -10
  300. package/dist/pipeline/expand-tasks.js +34 -28
  301. package/dist/pipeline/gap-analysis.d.ts +1 -1
  302. package/dist/pipeline/gap-analysis.js +2 -2
  303. package/dist/pipeline/generate-configs.d.ts +22 -4
  304. package/dist/pipeline/generate-configs.js +53 -24
  305. package/dist/pipeline/grader-api.d.ts +3 -3
  306. package/dist/pipeline/grader-api.js +5 -12
  307. package/dist/pipeline/grader-compare-runner.js +20 -27
  308. package/dist/pipeline/grader-comparison.d.ts +4 -8
  309. package/dist/pipeline/grader-comparison.js +11 -17
  310. package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
  311. package/dist/pipeline/grader-consistency-runner.js +16 -20
  312. package/dist/pipeline/grader-consistency.d.ts +6 -10
  313. package/dist/pipeline/grader-consistency.js +13 -32
  314. package/dist/pipeline/grader-sensitivity-runner.js +7 -5
  315. package/dist/pipeline/grader-sensitivity.d.ts +2 -6
  316. package/dist/pipeline/grader-sensitivity.js +10 -10
  317. package/dist/pipeline/grader-validate-runner.js +7 -5
  318. package/dist/pipeline/grader-validation.d.ts +2 -6
  319. package/dist/pipeline/grader-validation.js +14 -22
  320. package/dist/pipeline/map-request-to-config.js +7 -1
  321. package/dist/pipeline/mirror-repo-tasks.d.ts +13 -13
  322. package/dist/pipeline/mirror-repo-tasks.js +22 -21
  323. package/dist/pipeline/normalize-mode.d.ts +49 -0
  324. package/dist/pipeline/normalize-mode.js +64 -0
  325. package/dist/pipeline/plan.d.ts +5 -2
  326. package/dist/pipeline/plan.js +134 -78
  327. package/dist/pipeline/pr-comment.js +2 -0
  328. package/dist/pipeline/profile-resolution.d.ts +22 -14
  329. package/dist/pipeline/profile-resolution.js +41 -19
  330. package/dist/pipeline/provenance.d.ts +2 -2
  331. package/dist/pipeline/provenance.js +12 -17
  332. package/dist/pipeline/release-report.js +4 -4
  333. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  334. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  335. package/dist/pipeline/rubric-loader.d.ts +20 -0
  336. package/dist/pipeline/rubric-loader.js +37 -0
  337. package/dist/pipeline/validate.d.ts +4 -4
  338. package/dist/pipeline/validate.js +64 -53
  339. package/dist/schedules/loader.js +18 -8
  340. package/dist/scripts/migrate-task-mode.d.ts +24 -0
  341. package/dist/scripts/migrate-task-mode.js +85 -0
  342. package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
  343. package/dist/scripts/validate-task-sources.d.ts +1 -1
  344. package/dist/scripts/validate-task-sources.js +15 -15
  345. package/dist/sinks/loader.js +5 -7
  346. package/dist/sources.d.ts +7 -7
  347. package/dist/sources.js +22 -24
  348. package/dist/webhook/dispatch.js +2 -1
  349. package/package.json +15 -4
  350. package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
  351. package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
  352. package/tasks/literacy/frameworks.task.ts +128 -0
  353. package/tasks/literacy/functions.task.ts +69 -0
  354. package/tasks/literacy/groq.task.ts +258 -0
  355. package/tasks/literacy/nextjs-live.task.ts +75 -0
  356. package/tasks/literacy/studio-setup.task.ts +131 -0
  357. package/tasks/literacy/visual-editing.task.ts +146 -0
  358. package/config/features.yaml +0 -116
  359. package/config/models.yaml +0 -116
  360. package/config/prompts.yaml +0 -75
  361. package/config/rubrics.yaml +0 -81
  362. package/config/schedules.yaml +0 -43
  363. package/config/sinks.yaml +0 -54
  364. package/config/sources.yaml +0 -51
  365. package/config/thresholds.yaml +0 -49
  366. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  367. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  368. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  369. package/dist/_vendor/ailf-tasks/index.js +0 -16
  370. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  371. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  372. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  373. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  374. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  375. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  376. package/dist/agent-observer/test-imports.d.ts +0 -7
  377. package/dist/agent-observer/test-imports.js +0 -185
@@ -0,0 +1,104 @@
1
+ /**
2
+ * MCP server provider assembly — builds Promptfoo provider configs.
3
+ */
4
+ // ---------------------------------------------------------------------------
5
+ // Constants
6
+ // ---------------------------------------------------------------------------
7
+ /** Default max tool rounds for MCP multi-turn execution */
8
+ export const DEFAULT_MAX_TOOL_ROUNDS = 5;
9
+ /** Provider path relative to eval package dist */
10
+ export const MCP_PROVIDER_PATH = "file://dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js";
11
+ // ---------------------------------------------------------------------------
12
+ // Provider assembly
13
+ // ---------------------------------------------------------------------------
14
+ /**
15
+ * Build custom MCP tool provider configs — one per model.
16
+ *
17
+ * Each provider uses the custom mcp-tool-provider.ts which implements a
18
+ * multi-turn tool execution loop. The LLM receives a prompt, discovers
19
+ * MCP tools, calls them, gets results, and continues until it produces
20
+ * a final text answer or exhausts maxToolRounds.
21
+ *
22
+ * Config shape passed to the custom provider:
23
+ * { model, mcpServer: { url, auth, name }, mcpTools, maxToolRounds, temperature, ... }
24
+ */
25
+ export function buildMCPProvider(task, models, warnings) {
26
+ // Build the MCP server config
27
+ const mcpServer = buildMCPServerConfig(task, warnings);
28
+ const mcpTools = task.capabilities ?? undefined;
29
+ const maxToolRounds = task.maxToolRounds ?? DEFAULT_MAX_TOOL_ROUNDS;
30
+ // Helper to build a provider entry for a given model
31
+ function makeProvider(modelId, label, modelConfig) {
32
+ return {
33
+ id: MCP_PROVIDER_PATH,
34
+ label: `${label} + MCP`,
35
+ config: {
36
+ model: modelId,
37
+ mcpServer,
38
+ ...(mcpTools ? { mcpTools } : {}),
39
+ maxToolRounds,
40
+ ...(modelConfig ?? {}),
41
+ },
42
+ };
43
+ }
44
+ // Task-level model override takes precedence over registry models
45
+ const taskModels = task.models;
46
+ if (taskModels && taskModels.length > 0) {
47
+ return taskModels.map((modelId) => makeProvider(modelId, modelId));
48
+ }
49
+ // Use registry models (already filtered to mcp-server mode)
50
+ if (models.length === 0) {
51
+ warnings.push(`MCP task "${task.id}": no models available. Add "mcp-server" to a ` +
52
+ "model's modes array in config/models.ts, or set models on the task.");
53
+ return [
54
+ makeProvider("anthropic:messages:claude-sonnet-4-20250514", "Claude Sonnet 4"),
55
+ ];
56
+ }
57
+ return models.map((model) => makeProvider(model.id, model.label, model.config));
58
+ }
59
+ /**
60
+ * Build the MCP server connection config for the custom provider.
61
+ *
62
+ * Shape: { url?, command?, name?, auth? }
63
+ * The custom mcp-tool-provider.ts uses this to connect to the MCP server.
64
+ */
65
+ export function buildMCPServerConfig(task, warnings) {
66
+ const config = task.serverConfig;
67
+ if (!config) {
68
+ warnings.push(`MCP task "${task.id}": no serverConfig — using placeholder. ` +
69
+ "Set serverConfig.command or serverConfig.url to point to your MCP server.");
70
+ return { name: task.id };
71
+ }
72
+ const serverConfig = { name: task.id };
73
+ if (config.transport === "stdio") {
74
+ serverConfig.command = config.command;
75
+ }
76
+ else {
77
+ serverConfig.url = config.url;
78
+ }
79
+ // Auth config
80
+ if (config.auth) {
81
+ serverConfig.auth = config.auth;
82
+ }
83
+ else if (config.env) {
84
+ const tokenKey = Object.keys(config.env).find((k) => /token|auth|key/i.test(k));
85
+ if (tokenKey) {
86
+ const val = config.env[tokenKey];
87
+ let envVar = val;
88
+ if (val.startsWith("$env(") && val.endsWith(")")) {
89
+ envVar = val.slice(5, -1);
90
+ }
91
+ if (!envVar || !/^[A-Za-z_][A-Za-z0-9_]*$/.test(envVar)) {
92
+ warnings.push(`MCP task: env var name "${envVar}" from "${val}" is not a valid ` +
93
+ "identifier — skipping auth config");
94
+ }
95
+ else {
96
+ serverConfig.auth = {
97
+ type: "bearer",
98
+ token: `{{env.${envVar}}}`,
99
+ };
100
+ }
101
+ }
102
+ }
103
+ return serverConfig;
104
+ }
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Shared types for the MCP server mode handler.
3
+ */
4
+ import type { ModeProviderEntry } from "../../../../_vendor/ailf-core/index.d.ts";
5
+ import type { PromptfooPrompt, PromptfooProvider, PromptfooTestCase } from "../../promptfoo-compiler.js";
6
+ /** Options for compiling an MCP server task */
7
+ export interface MCPCompileOptions {
8
+ /** Grader provider for LLM-graded assertions */
9
+ graderProvider?: string;
10
+ /** Model providers to evaluate with (from registry, filtered by mcp-server mode) */
11
+ models?: ModeProviderEntry[];
12
+ }
13
+ /** Result of compiling a single MCP task */
14
+ export interface MCPCompileResult {
15
+ /** Promptfoo provider config for the MCP server */
16
+ providers: PromptfooProvider[];
17
+ /** Compiled test cases */
18
+ tests: PromptfooTestCase[];
19
+ /** Prompts for MCP evaluation */
20
+ prompts: PromptfooPrompt[];
21
+ /** Warnings generated during compilation */
22
+ warnings: string[];
23
+ }
24
+ /** Validation errors for MCP task definitions */
25
+ export interface MCPValidationError {
26
+ field: string;
27
+ message: string;
28
+ }
29
+ /** Context for building MCP assertions */
30
+ export interface MCPAssertionContext {
31
+ /** Task ID (for error messages) */
32
+ taskId: string;
33
+ /** Expected server capabilities */
34
+ capabilities: string[];
35
+ /** Grader provider for LLM-graded assertions */
36
+ graderProvider?: string;
37
+ }
@@ -0,0 +1,4 @@
1
+ /**
2
+ * Shared types for the MCP server mode handler.
3
+ */
4
+ export {};
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Validation for MCP server task definitions.
3
+ */
4
+ import type { MCPServerTaskDefinition } from "../../../../_vendor/ailf-core/index.d.ts";
5
+ import type { MCPValidationError } from "./types.js";
6
+ /**
7
+ * Validate that an MCP task definition has all required fields.
8
+ */
9
+ export declare function validateMCPTask(task: MCPServerTaskDefinition): MCPValidationError[];
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Validation for MCP server task definitions.
3
+ */
4
+ /**
5
+ * Validate that an MCP task definition has all required fields.
6
+ */
7
+ export function validateMCPTask(task) {
8
+ const errors = [];
9
+ if (!task.id) {
10
+ errors.push({ field: "id", message: "Task ID is required" });
11
+ }
12
+ if (!task.title) {
13
+ errors.push({ field: "title", message: "Task title is required" });
14
+ }
15
+ if (task.serverConfig) {
16
+ const { transport, command, url } = task.serverConfig;
17
+ if (transport === "stdio" && !command) {
18
+ errors.push({
19
+ field: "serverConfig.command",
20
+ message: "Server command is required for stdio transport (e.g., 'node dist/server.js')",
21
+ });
22
+ }
23
+ if ((transport === "sse" || transport === "streamable-http") && !url) {
24
+ errors.push({
25
+ field: "serverConfig.url",
26
+ message: `Server URL is required for ${transport} transport`,
27
+ });
28
+ }
29
+ }
30
+ // Assertions should reference MCP-compatible types
31
+ if (task.assertions) {
32
+ for (const assertion of task.assertions) {
33
+ if (assertion.type === "tool-called" &&
34
+ !("value" in assertion && assertion.value)) {
35
+ errors.push({
36
+ field: "assertions",
37
+ message: 'tool-called assertion requires a "value" specifying the tool name',
38
+ });
39
+ }
40
+ }
41
+ }
42
+ return errors;
43
+ }
@@ -0,0 +1,33 @@
1
+ /**
2
+ * MCPToolProvider — Custom Promptfoo provider for MCP tool-use evaluation.
3
+ *
4
+ * Orchestrates the MCP evaluation flow:
5
+ * 1. Connects to the MCP server and discovers available tools
6
+ * 2. Selects the appropriate LLM backend based on model ID prefix
7
+ * 3. Delegates the multi-turn tool loop to the backend
8
+ * 4. Formats the result for Promptfoo (including tool call summary)
9
+ *
10
+ * Promptfoo config usage:
11
+ *
12
+ * providers:
13
+ * - id: file://dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js
14
+ * label: "Claude Opus 4.6 + MCP"
15
+ * config:
16
+ * model: anthropic:messages:claude-opus-4-6
17
+ * maxToolRounds: 5
18
+ * temperature: 0.2
19
+ * max_tokens: 4096
20
+ * mcpServer:
21
+ * url: https://mcp.sanity.io
22
+ * auth: { type: bearer, token: "{{env.SANITY_API_TOKEN}}" }
23
+ * name: mcp-live-query-documents
24
+ * mcpTools: [query_documents, get_schema]
25
+ */
26
+ import type { CallApiContextParams, ProviderOptions, ProviderResponse } from "./types.js";
27
+ export default class MCPToolProvider {
28
+ config: Record<string, unknown>;
29
+ private providerId;
30
+ constructor(options?: ProviderOptions);
31
+ id(): string;
32
+ callApi(prompt: string, _context?: CallApiContextParams): Promise<ProviderResponse>;
33
+ }
@@ -0,0 +1,174 @@
1
+ /**
2
+ * MCPToolProvider — Custom Promptfoo provider for MCP tool-use evaluation.
3
+ *
4
+ * Orchestrates the MCP evaluation flow:
5
+ * 1. Connects to the MCP server and discovers available tools
6
+ * 2. Selects the appropriate LLM backend based on model ID prefix
7
+ * 3. Delegates the multi-turn tool loop to the backend
8
+ * 4. Formats the result for Promptfoo (including tool call summary)
9
+ *
10
+ * Promptfoo config usage:
11
+ *
12
+ * providers:
13
+ * - id: file://dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js
14
+ * label: "Claude Opus 4.6 + MCP"
15
+ * config:
16
+ * model: anthropic:messages:claude-opus-4-6
17
+ * maxToolRounds: 5
18
+ * temperature: 0.2
19
+ * max_tokens: 4096
20
+ * mcpServer:
21
+ * url: https://mcp.sanity.io
22
+ * auth: { type: bearer, token: "{{env.SANITY_API_TOKEN}}" }
23
+ * name: mcp-live-query-documents
24
+ * mcpTools: [query_documents, get_schema]
25
+ */
26
+ import { config as loadDotenv } from "dotenv";
27
+ import { connectMCP } from "./mcp-connection.js";
28
+ import { runAnthropicToolLoop } from "./tool-loop-anthropic.js";
29
+ import { runOpenAIToolLoop } from "./tool-loop-openai.js";
30
+ loadDotenv({
31
+ override: true,
32
+ path: new URL("../../../../../.env", import.meta.url).pathname,
33
+ });
34
+ // ---------------------------------------------------------------------------
35
+ // Backend registry — maps model ID prefixes to tool loop implementations
36
+ // ---------------------------------------------------------------------------
37
+ const BACKENDS = {
38
+ anthropic: runAnthropicToolLoop,
39
+ openai: runOpenAIToolLoop,
40
+ };
41
+ /**
42
+ * Resolve the LLM backend from a model ID.
43
+ *
44
+ * Model IDs follow the pattern `provider:type:model-name` (e.g.,
45
+ * `anthropic:messages:claude-opus-4-6`). The first segment determines
46
+ * which backend handles the tool loop.
47
+ */
48
+ function resolveBackend(modelId) {
49
+ const prefix = modelId.split(":")[0];
50
+ const backend = BACKENDS[prefix];
51
+ if (!backend) {
52
+ const supported = Object.keys(BACKENDS).join(", ");
53
+ throw new Error(`No backend for model "${modelId}". Supported prefixes: ${supported}`);
54
+ }
55
+ // Extract the model name for the API (e.g., "claude-opus-4-6" from "anthropic:messages:claude-opus-4-6")
56
+ const parts = modelId.split(":");
57
+ const modelName = parts.length > 2 ? parts.slice(2).join(":") : parts[parts.length - 1];
58
+ return { backend, modelName };
59
+ }
60
+ // ---------------------------------------------------------------------------
61
+ // Helpers
62
+ // ---------------------------------------------------------------------------
63
+ /** Append a machine-readable tool call summary for assertion detection */
64
+ function appendToolSummary(text, log) {
65
+ if (log.length === 0)
66
+ return text;
67
+ const names = JSON.stringify(log.map((tc) => tc.name));
68
+ return `${text}\n\n<!-- MCP_TOOLS_CALLED: ${names} -->`;
69
+ }
70
+ /** Resolve the API key for a given model prefix */
71
+ function resolveApiKey(prefix, config) {
72
+ if (config.apiKey)
73
+ return String(config.apiKey);
74
+ const envMap = {
75
+ anthropic: "ANTHROPIC_API_KEY",
76
+ openai: "OPENAI_API_KEY",
77
+ };
78
+ const envVar = envMap[prefix];
79
+ return envVar ? process.env[envVar] : undefined;
80
+ }
81
+ // ---------------------------------------------------------------------------
82
+ // Provider class
83
+ // ---------------------------------------------------------------------------
84
+ export default class MCPToolProvider {
85
+ config;
86
+ providerId;
87
+ constructor(options = {}) {
88
+ this.config = options.config || {};
89
+ this.providerId = options.id || "mcp-tool-provider";
90
+ }
91
+ id() {
92
+ return this.providerId;
93
+ }
94
+ async callApi(prompt, _context) {
95
+ const mcpServerConfig = this.config.mcpServer;
96
+ if (!mcpServerConfig) {
97
+ return { error: "mcpServer config is required", output: undefined };
98
+ }
99
+ // Resolve model and backend
100
+ const modelId = this.config.model || "anthropic:messages:claude-opus-4-6";
101
+ let backend;
102
+ let modelName;
103
+ try {
104
+ const resolved = resolveBackend(modelId);
105
+ backend = resolved.backend;
106
+ modelName = resolved.modelName;
107
+ }
108
+ catch (err) {
109
+ return {
110
+ error: err instanceof Error ? err.message : String(err),
111
+ output: undefined,
112
+ };
113
+ }
114
+ // Resolve API key
115
+ const prefix = modelId.split(":")[0];
116
+ const apiKey = resolveApiKey(prefix, this.config);
117
+ if (!apiKey) {
118
+ return {
119
+ error: `API key not found for ${prefix}. Set ${prefix.toUpperCase()}_API_KEY in env or config.apiKey.`,
120
+ output: undefined,
121
+ };
122
+ }
123
+ // Connect to MCP server
124
+ let mcpClient;
125
+ try {
126
+ mcpClient = await connectMCP(mcpServerConfig);
127
+ }
128
+ catch (err) {
129
+ return {
130
+ error: `Failed to connect to MCP server: ${err instanceof Error ? err.message : String(err)}`,
131
+ output: undefined,
132
+ };
133
+ }
134
+ try {
135
+ // Filter tools by capabilities
136
+ const allTools = mcpClient.getAllTools();
137
+ const toolFilter = this.config.mcpTools;
138
+ const tools = toolFilter
139
+ ? allTools.filter((t) => toolFilter.includes(t.name))
140
+ : allTools;
141
+ if (tools.length === 0) {
142
+ return {
143
+ error: "No MCP tools available after filtering. Check mcpTools config and server capabilities.",
144
+ output: undefined,
145
+ };
146
+ }
147
+ // Run the tool loop
148
+ const result = await backend({
149
+ prompt,
150
+ tools,
151
+ callTool: mcpClient.callTool,
152
+ maxToolRounds: this.config.maxToolRounds || 5,
153
+ model: modelName,
154
+ temperature: this.config.temperature ?? 0.2,
155
+ maxTokens: this.config.max_tokens || 4096,
156
+ apiKey,
157
+ });
158
+ return {
159
+ cost: 0,
160
+ metadata: {
161
+ toolRounds: result.toolRounds,
162
+ toolCallLog: result.toolCallLog,
163
+ exhaustedRounds: result.exhaustedRounds,
164
+ latencyMs: result.latencyMs,
165
+ },
166
+ output: appendToolSummary(result.output, result.toolCallLog),
167
+ tokenUsage: result.tokenUsage,
168
+ };
169
+ }
170
+ finally {
171
+ await mcpClient.cleanup().catch(() => { });
172
+ }
173
+ }
174
+ }
@@ -0,0 +1,19 @@
1
+ /**
2
+ * MCP server connection and tool discovery.
3
+ *
4
+ * Handles connecting to an MCP server via streamable-http or stdio transport,
5
+ * discovering available tools, and resolving {{env.VAR}} templates in config.
6
+ */
7
+ import type { MCPClient } from "./types.js";
8
+ /**
9
+ * Connect to an MCP server and return a client for tool discovery and execution.
10
+ *
11
+ * Supports two transport types:
12
+ * - `url` → streamable-http (remote MCP servers like mcp.sanity.io)
13
+ * - `command` → stdio (local MCP server processes)
14
+ */
15
+ export declare function connectMCP(serverConfig: Record<string, unknown>): Promise<MCPClient>;
16
+ /**
17
+ * Resolve `{{env.VAR}}` templates in config values, recursively.
18
+ */
19
+ export declare function resolveEnvTemplates(config: Record<string, unknown>): Record<string, unknown>;
@@ -0,0 +1,95 @@
1
+ /**
2
+ * MCP server connection and tool discovery.
3
+ *
4
+ * Handles connecting to an MCP server via streamable-http or stdio transport,
5
+ * discovering available tools, and resolving {{env.VAR}} templates in config.
6
+ */
7
+ /**
8
+ * Connect to an MCP server and return a client for tool discovery and execution.
9
+ *
10
+ * Supports two transport types:
11
+ * - `url` → streamable-http (remote MCP servers like mcp.sanity.io)
12
+ * - `command` → stdio (local MCP server processes)
13
+ */
14
+ export async function connectMCP(serverConfig) {
15
+ const { Client } = await import("@modelcontextprotocol/sdk/client/index.js");
16
+ const client = new Client({
17
+ name: "ailf-mcp-eval",
18
+ version: "1.0.0",
19
+ });
20
+ const resolvedConfig = resolveEnvTemplates(serverConfig);
21
+ let closeTransport;
22
+ if (resolvedConfig.command) {
23
+ const { StdioClientTransport } = await import("@modelcontextprotocol/sdk/client/stdio.js");
24
+ const parts = String(resolvedConfig.command).split(/\s+/);
25
+ const transport = new StdioClientTransport({
26
+ command: parts[0],
27
+ args: parts.slice(1),
28
+ env: process.env,
29
+ });
30
+ await client.connect(transport);
31
+ closeTransport = () => transport.close();
32
+ }
33
+ else if (resolvedConfig.url) {
34
+ const { StreamableHTTPClientTransport } = await import("@modelcontextprotocol/sdk/client/streamableHttp.js");
35
+ const headers = {};
36
+ const auth = resolvedConfig.auth;
37
+ if (auth?.type === "bearer" && auth.token) {
38
+ headers["Authorization"] = `Bearer ${auth.token}`;
39
+ }
40
+ const transport = new StreamableHTTPClientTransport(new URL(String(resolvedConfig.url)), { requestInit: { headers } });
41
+ await client.connect(transport);
42
+ closeTransport = () => transport.close();
43
+ }
44
+ else {
45
+ throw new Error("MCP server config must have either 'command' (stdio) or 'url' (http)");
46
+ }
47
+ // Discover tools
48
+ const { tools: toolsList } = await client.listTools();
49
+ const allTools = toolsList.map((t) => ({
50
+ name: t.name,
51
+ description: t.description,
52
+ inputSchema: t.inputSchema,
53
+ }));
54
+ return {
55
+ getAllTools: () => allTools,
56
+ callTool: async (name, args) => {
57
+ const result = await client.callTool({ name, arguments: args });
58
+ let content = "";
59
+ if (result?.content) {
60
+ if (Array.isArray(result.content)) {
61
+ content = result.content
62
+ .map((c) => c.text || JSON.stringify(c))
63
+ .join("\n");
64
+ }
65
+ else {
66
+ content = String(result.content);
67
+ }
68
+ }
69
+ return { content, error: result.isError ? content : undefined };
70
+ },
71
+ cleanup: async () => {
72
+ await closeTransport().catch(() => { });
73
+ },
74
+ };
75
+ }
76
+ /**
77
+ * Resolve `{{env.VAR}}` templates in config values, recursively.
78
+ */
79
+ export function resolveEnvTemplates(config) {
80
+ const resolved = {};
81
+ for (const [key, value] of Object.entries(config)) {
82
+ if (typeof value === "string") {
83
+ resolved[key] = value.replace(/\{\{env\.(\w+)\}\}/g, (_, varName) => {
84
+ return process.env[varName] || "";
85
+ });
86
+ }
87
+ else if (value && typeof value === "object" && !Array.isArray(value)) {
88
+ resolved[key] = resolveEnvTemplates(value);
89
+ }
90
+ else {
91
+ resolved[key] = value;
92
+ }
93
+ }
94
+ return resolved;
95
+ }
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Anthropic multi-turn tool execution loop.
3
+ *
4
+ * Sends a prompt to the Anthropic Messages API with MCP tools attached.
5
+ * When the model calls a tool, executes it via the MCP client, feeds
6
+ * the result back, and continues until the model produces a final text
7
+ * response or maxToolRounds is exhausted.
8
+ */
9
+ import type { ToolLoopConfig, ToolLoopResult } from "./types.js";
10
+ /**
11
+ * Run a multi-turn tool loop using the Anthropic Messages API.
12
+ *
13
+ * The loop:
14
+ * 1. Sends the prompt with available tools to Claude
15
+ * 2. If Claude calls tools → executes them via MCP, sends results back
16
+ * 3. Repeats until Claude produces a text-only response or maxToolRounds is hit
17
+ * 4. On the last round, omits tools to force a synthesis response
18
+ */
19
+ export declare function runAnthropicToolLoop(config: ToolLoopConfig): Promise<ToolLoopResult>;