@sanity/ailf 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (499) hide show
  1. package/README.md +0 -1
  2. package/canonical/grader-references/README.md +2 -2
  3. package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
  4. package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
  5. package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
  6. package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
  7. package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
  8. package/config/features.ts +1 -1
  9. package/config/models.ts +29 -12
  10. package/config/sources.ts +1 -1
  11. package/config/thresholds.ts +1 -1
  12. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
  13. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
  14. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
  15. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
  16. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
  17. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
  18. package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
  19. package/dist/_vendor/ailf-core/config-helpers.js +51 -2
  20. package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
  21. package/dist/_vendor/ailf-core/examples/index.js +213 -94
  22. package/dist/_vendor/ailf-core/index.d.ts +3 -2
  23. package/dist/_vendor/ailf-core/index.js +2 -1
  24. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
  25. package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
  26. package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
  27. package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
  28. package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
  29. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
  30. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  31. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
  32. package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
  33. package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
  34. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  35. package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
  36. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
  37. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
  38. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
  39. package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
  40. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
  41. package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
  42. package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
  43. package/dist/_vendor/ailf-core/services/index.js +1 -1
  44. package/dist/_vendor/ailf-core/services/scoring.js +9 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
  47. package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
  48. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
  49. package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
  50. package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
  51. package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
  52. package/dist/adapters/api-client/remediation.js +2 -2
  53. package/dist/adapters/config-sources/file-config-adapter.js +7 -1
  54. package/dist/adapters/config-sources/ts-config-loader.js +21 -13
  55. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
  56. package/dist/adapters/index.d.ts +0 -1
  57. package/dist/adapters/index.js +0 -1
  58. package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
  59. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  60. package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
  61. package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
  62. package/dist/adapters/task-sources/index.d.ts +3 -4
  63. package/dist/adapters/task-sources/index.js +3 -4
  64. package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
  65. package/dist/adapters/task-sources/repo-schemas.js +228 -20
  66. package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
  67. package/dist/adapters/task-sources/repo-task-source.js +81 -122
  68. package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
  69. package/dist/adapters/task-sources/repo-trigger.js +1 -1
  70. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  71. package/dist/adapters/task-sources/repo-validation.js +126 -5
  72. package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
  73. package/dist/adapters/task-sources/task-file-loader.js +21 -7
  74. package/dist/agent-observer/test-imports.d.ts +7 -0
  75. package/dist/agent-observer/test-imports.js +185 -0
  76. package/dist/artifact-capture/comparator.d.ts +22 -0
  77. package/dist/artifact-capture/comparator.js +493 -0
  78. package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
  79. package/dist/artifact-capture/filesystem-collector.js +237 -0
  80. package/dist/artifact-capture/redact-artifact.d.ts +20 -0
  81. package/dist/artifact-capture/redact-artifact.js +115 -0
  82. package/dist/assertions/source-isolation.d.ts +1 -1
  83. package/dist/assertions/source-isolation.js +1 -1
  84. package/dist/cli.js +4 -0
  85. package/dist/commands/calculate-scores.js +1 -0
  86. package/dist/commands/capture-compare.d.ts +15 -0
  87. package/dist/commands/capture-compare.js +253 -0
  88. package/dist/commands/capture-list.d.ts +12 -0
  89. package/dist/commands/capture-list.js +147 -0
  90. package/dist/commands/capture.d.ts +9 -0
  91. package/dist/commands/capture.js +16 -0
  92. package/dist/commands/chronic-failures.d.ts +8 -0
  93. package/dist/commands/chronic-failures.js +33 -0
  94. package/dist/commands/coverage-audit.js +3 -1
  95. package/dist/commands/explain-handler.d.ts +1 -1
  96. package/dist/commands/explain-handler.js +37 -8
  97. package/dist/commands/fetch-docs.js +1 -0
  98. package/dist/commands/generate-configs.d.ts +3 -3
  99. package/dist/commands/generate-configs.js +20 -8
  100. package/dist/commands/init.d.ts +5 -4
  101. package/dist/commands/init.js +190 -25
  102. package/dist/commands/pipeline-action.d.ts +7 -1
  103. package/dist/commands/pipeline-action.js +43 -19
  104. package/dist/commands/pipeline.d.ts +6 -1
  105. package/dist/commands/pipeline.js +7 -2
  106. package/dist/commands/pr-comment.js +1 -0
  107. package/dist/commands/publish.js +1 -0
  108. package/dist/commands/shared/help.js +2 -2
  109. package/dist/commands/update-quality-scores.d.ts +5 -0
  110. package/dist/commands/update-quality-scores.js +20 -0
  111. package/dist/commands/validate-tasks.d.ts +2 -2
  112. package/dist/commands/validate-tasks.js +26 -15
  113. package/dist/composition-root.d.ts +15 -4
  114. package/dist/composition-root.js +100 -55
  115. package/dist/config/features.ts +23 -0
  116. package/dist/config/models.ts +100 -0
  117. package/dist/config/prompts.ts +16 -0
  118. package/dist/config/rubrics.ts +225 -0
  119. package/dist/config/schedules.ts +47 -0
  120. package/dist/config/sinks.ts +37 -0
  121. package/dist/config/sources.ts +21 -0
  122. package/dist/config/thresholds.ts +61 -0
  123. package/dist/index.d.ts +41 -0
  124. package/dist/index.js +48 -0
  125. package/dist/lib/agent-behavior-report.d.ts +8 -0
  126. package/dist/lib/agent-behavior-report.js +185 -0
  127. package/dist/lib/baseline.d.ts +19 -0
  128. package/dist/lib/baseline.js +153 -0
  129. package/dist/lib/calculate-scores.d.ts +23 -0
  130. package/dist/lib/calculate-scores.js +42 -0
  131. package/dist/lib/compare.d.ts +18 -0
  132. package/dist/lib/compare.js +170 -0
  133. package/dist/lib/coverage-audit.d.ts +4 -0
  134. package/dist/lib/coverage-audit.js +42 -0
  135. package/dist/lib/discovery-report.d.ts +13 -0
  136. package/dist/lib/discovery-report.js +57 -0
  137. package/dist/lib/fetch-docs.d.ts +30 -0
  138. package/dist/lib/fetch-docs.js +171 -0
  139. package/dist/lib/generate-configs.d.ts +25 -0
  140. package/dist/lib/generate-configs.js +42 -0
  141. package/dist/lib/grader-api.d.ts +21 -0
  142. package/dist/lib/grader-api.js +34 -0
  143. package/dist/lib/grader-compare.d.ts +19 -0
  144. package/dist/lib/grader-compare.js +91 -0
  145. package/dist/lib/grader-consistency.d.ts +27 -0
  146. package/dist/lib/grader-consistency.js +79 -0
  147. package/dist/lib/grader-sensitivity.d.ts +19 -0
  148. package/dist/lib/grader-sensitivity.js +75 -0
  149. package/dist/lib/grader-validate.d.ts +19 -0
  150. package/dist/lib/grader-validate.js +78 -0
  151. package/dist/lib/measure-retrieval.d.ts +14 -0
  152. package/dist/lib/measure-retrieval.js +71 -0
  153. package/dist/lib/pr-comment.d.ts +16 -0
  154. package/dist/lib/pr-comment.js +28 -0
  155. package/dist/lib/readiness-report.d.ts +13 -0
  156. package/dist/lib/readiness-report.js +108 -0
  157. package/dist/lib/webhook-server.d.ts +11 -0
  158. package/dist/lib/webhook-server.js +24 -0
  159. package/dist/lib/weekly-digest.d.ts +24 -0
  160. package/dist/lib/weekly-digest.js +148 -0
  161. package/dist/orchestration/build-app-context.js +13 -0
  162. package/dist/orchestration/build-step-sequence.js +4 -2
  163. package/dist/orchestration/cache-context.d.ts +23 -0
  164. package/dist/orchestration/cache-context.js +43 -0
  165. package/dist/orchestration/env-bridge.d.ts +21 -0
  166. package/dist/orchestration/env-bridge.js +66 -0
  167. package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
  168. package/dist/orchestration/load-pipeline-tasks.js +52 -0
  169. package/dist/orchestration/pipeline-orchestrator.js +75 -5
  170. package/dist/orchestration/step-runner.js +5 -1
  171. package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
  172. package/dist/orchestration/steps/calculate-scores-step.js +13 -0
  173. package/dist/orchestration/steps/callback-step.js +10 -1
  174. package/dist/orchestration/steps/compare-step.js +6 -3
  175. package/dist/orchestration/steps/discovery-report-step.js +6 -2
  176. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  177. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  178. package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
  179. package/dist/orchestration/steps/fetch-docs-step.js +32 -19
  180. package/dist/orchestration/steps/gap-analysis-step.js +13 -2
  181. package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
  182. package/dist/orchestration/steps/generate-configs-step.js +77 -26
  183. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
  184. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  185. package/dist/orchestration/steps/publish-report-step.js +19 -0
  186. package/dist/orchestration/steps/readiness-step.js +8 -3
  187. package/dist/orchestration/steps/report-step.js +17 -4
  188. package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
  189. package/dist/orchestration/steps/run-eval-step.js +51 -31
  190. package/dist/pipeline/agent-behavior-report.js +6 -0
  191. package/dist/pipeline/attribution.d.ts +1 -1
  192. package/dist/pipeline/attribution.js +1 -1
  193. package/dist/pipeline/cache.js +29 -15
  194. package/dist/pipeline/calculate-scores.d.ts +2 -0
  195. package/dist/pipeline/calculate-scores.js +70 -33
  196. package/dist/pipeline/chronic-failures.d.ts +55 -0
  197. package/dist/pipeline/chronic-failures.js +110 -0
  198. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
  199. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
  200. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
  201. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
  202. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
  203. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
  204. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
  205. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
  206. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
  207. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
  208. package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
  209. package/dist/pipeline/compiler/assertion-mapper.js +1 -1
  210. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
  211. package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
  212. package/dist/pipeline/compiler/config-loader.d.ts +14 -0
  213. package/dist/pipeline/compiler/config-loader.js +42 -2
  214. package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
  215. package/dist/pipeline/compiler/fixture-resolver.js +1 -1
  216. package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
  217. package/dist/pipeline/compiler/ignore-fields.js +1 -1
  218. package/dist/pipeline/compiler/index.d.ts +2 -5
  219. package/dist/pipeline/compiler/index.js +2 -5
  220. package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
  221. package/dist/pipeline/compiler/literacy-bridge.js +2 -2
  222. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  223. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  224. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  225. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  226. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  227. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  228. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
  229. package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
  230. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  231. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  232. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  233. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  234. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  235. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  236. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  237. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  238. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  239. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  240. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  241. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  242. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  243. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  245. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  246. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  247. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  248. package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
  249. package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
  250. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  251. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  252. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  253. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  254. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  255. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  256. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  257. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  258. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  259. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  260. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  261. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  262. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  263. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  264. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  265. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  266. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  267. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  268. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  269. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  270. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  271. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  272. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  273. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  274. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
  275. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  276. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  277. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  278. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  279. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  280. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  281. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  282. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  283. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  284. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
  285. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  286. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  287. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  288. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  289. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
  290. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
  291. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  292. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
  293. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  294. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
  295. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  296. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  297. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
  298. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
  299. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
  300. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  301. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
  302. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
  303. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  304. package/dist/pipeline/compiler/preset-loader.js +99 -0
  305. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
  306. package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
  307. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
  308. package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
  309. package/dist/pipeline/compiler/provider-assembler.js +13 -7
  310. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
  311. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
  312. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
  313. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
  314. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
  315. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
  316. package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
  317. package/dist/pipeline/compiler/sandbox/index.js +1 -1
  318. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
  319. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
  320. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
  321. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
  322. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
  323. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
  324. package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
  325. package/dist/pipeline/compiler/scoring-bridge.js +1 -1
  326. package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
  327. package/dist/pipeline/compiler/task-bridge.js +92 -0
  328. package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
  329. package/dist/pipeline/compiler/task-graph-builder.js +1 -4
  330. package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
  331. package/dist/pipeline/compiler/telemetry/index.js +1 -1
  332. package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
  333. package/dist/pipeline/compiler/variable-resolver.js +1 -1
  334. package/dist/pipeline/coverage-audit.d.ts +1 -1
  335. package/dist/pipeline/coverage-audit.js +1 -1
  336. package/dist/pipeline/degradations.d.ts +1 -1
  337. package/dist/pipeline/degradations.js +1 -1
  338. package/dist/pipeline/expand-tasks.d.ts +2 -2
  339. package/dist/pipeline/expand-tasks.js +2 -2
  340. package/dist/pipeline/failure-modes.d.ts +1 -1
  341. package/dist/pipeline/failure-modes.js +13 -1
  342. package/dist/pipeline/gap-analysis.d.ts +1 -1
  343. package/dist/pipeline/gap-analysis.js +3 -1
  344. package/dist/pipeline/generate-configs.d.ts +2 -2
  345. package/dist/pipeline/generate-configs.js +16 -9
  346. package/dist/pipeline/grader-compare-runner.d.ts +1 -1
  347. package/dist/pipeline/grader-compare-runner.js +7 -1
  348. package/dist/pipeline/grader-comparison.d.ts +1 -1
  349. package/dist/pipeline/grader-comparison.js +1 -1
  350. package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
  351. package/dist/pipeline/grader-consistency-runner.js +7 -1
  352. package/dist/pipeline/grader-consistency.d.ts +1 -1
  353. package/dist/pipeline/grader-consistency.js +1 -1
  354. package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
  355. package/dist/pipeline/grader-sensitivity-runner.js +1 -1
  356. package/dist/pipeline/grader-sensitivity.d.ts +1 -1
  357. package/dist/pipeline/grader-sensitivity.js +1 -1
  358. package/dist/pipeline/grader-validate-runner.d.ts +1 -1
  359. package/dist/pipeline/grader-validate-runner.js +2 -2
  360. package/dist/pipeline/grader-validation.d.ts +1 -1
  361. package/dist/pipeline/grader-validation.js +1 -1
  362. package/dist/pipeline/map-request-to-config.js +16 -2
  363. package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
  364. package/dist/pipeline/mirror-repo-tasks.js +10 -10
  365. package/dist/pipeline/plan-format.d.ts +1 -1
  366. package/dist/pipeline/plan-format.js +1 -1
  367. package/dist/pipeline/plan.d.ts +1 -1
  368. package/dist/pipeline/plan.js +68 -30
  369. package/dist/pipeline/probe.d.ts +1 -1
  370. package/dist/pipeline/probe.js +1 -1
  371. package/dist/pipeline/readiness-report.d.ts +2 -2
  372. package/dist/pipeline/readiness-report.js +2 -2
  373. package/dist/pipeline/release-classification.d.ts +1 -1
  374. package/dist/pipeline/release-classification.js +1 -1
  375. package/dist/pipeline/release-report.d.ts +1 -1
  376. package/dist/pipeline/release-report.js +1 -1
  377. package/dist/pipeline/repo-eval-comment.d.ts +1 -1
  378. package/dist/pipeline/repo-eval-comment.js +1 -1
  379. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  380. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  381. package/dist/pipeline/resolve-mappings.d.ts +6 -6
  382. package/dist/pipeline/resolve-mappings.js +44 -44
  383. package/dist/pipeline/retrieval-metrics.d.ts +3 -3
  384. package/dist/pipeline/retrieval-metrics.js +28 -20
  385. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  386. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  387. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  388. package/dist/pipeline/steps/compare-step.js +90 -0
  389. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  390. package/dist/pipeline/steps/eval-step.js +347 -0
  391. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  392. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  393. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  394. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  395. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  396. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  397. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  398. package/dist/pipeline/steps/publish-report-step.js +243 -0
  399. package/dist/pipeline/steps/report-step.d.ts +13 -0
  400. package/dist/pipeline/steps/report-step.js +56 -0
  401. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  402. package/dist/pipeline/steps/update-scores-step.js +42 -0
  403. package/dist/pipeline/targeted-loo.d.ts +1 -1
  404. package/dist/pipeline/targeted-loo.js +1 -1
  405. package/dist/pipeline/thresholds.d.ts +1 -1
  406. package/dist/pipeline/thresholds.js +1 -1
  407. package/dist/pipeline/validate.js +13 -0
  408. package/dist/report-store.d.ts +17 -0
  409. package/dist/report-store.js +24 -0
  410. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  411. package/dist/scripts/agent-behavior-report.js +315 -0
  412. package/dist/scripts/baseline.d.ts +43 -0
  413. package/dist/scripts/baseline.js +267 -0
  414. package/dist/scripts/calculate-scores.d.ts +166 -0
  415. package/dist/scripts/calculate-scores.js +1296 -0
  416. package/dist/scripts/compare.d.ts +22 -0
  417. package/dist/scripts/compare.js +334 -0
  418. package/dist/scripts/coverage-audit.d.ts +44 -0
  419. package/dist/scripts/coverage-audit.js +209 -0
  420. package/dist/scripts/debug-eval.d.ts +19 -0
  421. package/dist/scripts/debug-eval.js +73 -0
  422. package/dist/scripts/discovery-report.d.ts +58 -0
  423. package/dist/scripts/discovery-report.js +250 -0
  424. package/dist/scripts/fetch-docs.d.ts +35 -0
  425. package/dist/scripts/fetch-docs.js +472 -0
  426. package/dist/scripts/generate-configs.d.ts +66 -0
  427. package/dist/scripts/generate-configs.js +459 -0
  428. package/dist/scripts/grader-api.d.ts +27 -0
  429. package/dist/scripts/grader-api.js +206 -0
  430. package/dist/scripts/grader-compare.d.ts +22 -0
  431. package/dist/scripts/grader-compare.js +368 -0
  432. package/dist/scripts/grader-consistency.d.ts +20 -0
  433. package/dist/scripts/grader-consistency.js +313 -0
  434. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  435. package/dist/scripts/grader-sensitivity.js +354 -0
  436. package/dist/scripts/grader-validate.d.ts +19 -0
  437. package/dist/scripts/grader-validate.js +267 -0
  438. package/dist/scripts/measure-retrieval.d.ts +10 -0
  439. package/dist/scripts/measure-retrieval.js +145 -0
  440. package/dist/scripts/migrate-task-mode.d.ts +1 -1
  441. package/dist/scripts/migrate-task-mode.js +1 -1
  442. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
  443. package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
  444. package/dist/scripts/pipeline.d.ts +76 -0
  445. package/dist/scripts/pipeline.js +1031 -0
  446. package/dist/scripts/pr-comment.d.ts +10 -0
  447. package/dist/scripts/pr-comment.js +510 -0
  448. package/dist/scripts/readiness-report.d.ts +88 -0
  449. package/dist/scripts/readiness-report.js +342 -0
  450. package/dist/scripts/update-quality-scores.d.ts +15 -0
  451. package/dist/scripts/update-quality-scores.js +184 -0
  452. package/dist/scripts/validate-task-sources.d.ts +1 -1
  453. package/dist/scripts/validate-task-sources.js +1 -1
  454. package/dist/scripts/validate.d.ts +13 -0
  455. package/dist/scripts/validate.js +79 -0
  456. package/dist/scripts/webhook-server.d.ts +26 -0
  457. package/dist/scripts/webhook-server.js +147 -0
  458. package/dist/scripts/weekly-digest.d.ts +24 -0
  459. package/dist/scripts/weekly-digest.js +144 -0
  460. package/dist/sinks/format-slack.d.ts +64 -0
  461. package/dist/sinks/format-slack.js +306 -0
  462. package/dist/sinks/slack-sink.d.ts +27 -0
  463. package/dist/sinks/slack-sink.js +78 -0
  464. package/dist/sinks/types.d.ts +1 -1
  465. package/dist/sinks/types.js +1 -1
  466. package/dist/sinks/webhook-sink.d.ts +19 -0
  467. package/dist/sinks/webhook-sink.js +50 -0
  468. package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
  469. package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
  470. package/dist/tasks/literacy/content-lake.task.ts +181 -0
  471. package/dist/tasks/literacy/frameworks.task.ts +129 -0
  472. package/dist/tasks/literacy/functions.task.ts +70 -0
  473. package/dist/tasks/literacy/groq.task.ts +259 -0
  474. package/dist/tasks/literacy/image-handling.task.ts +95 -0
  475. package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
  476. package/dist/tasks/literacy/portable-text.task.ts +169 -0
  477. package/dist/tasks/literacy/studio-setup.task.ts +134 -0
  478. package/dist/tasks/literacy/visual-editing.task.ts +147 -0
  479. package/package.json +32 -24
  480. package/tasks/.expanded.agentic.yaml +280 -0
  481. package/tasks/.expanded.yaml +565 -0
  482. package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
  483. package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
  484. package/tasks/literacy/content-lake.task.ts +181 -0
  485. package/tasks/literacy/frameworks.task.ts +1 -0
  486. package/tasks/literacy/functions.task.ts +1 -0
  487. package/tasks/literacy/groq.task.ts +1 -0
  488. package/tasks/literacy/image-handling.task.ts +95 -0
  489. package/tasks/literacy/nextjs-live.task.ts +2 -1
  490. package/tasks/literacy/portable-text.task.ts +169 -0
  491. package/tasks/literacy/studio-setup.task.ts +5 -2
  492. package/tasks/literacy/visual-editing.task.ts +1 -0
  493. package/LICENSE +0 -21
  494. package/tasks/frameworks.yaml +0 -98
  495. package/tasks/functions.yaml +0 -51
  496. package/tasks/groq.yaml +0 -216
  497. package/tasks/nextjs-live.yaml +0 -62
  498. package/tasks/studio-setup.yaml +0 -111
  499. package/tasks/visual-editing.yaml +0 -120
@@ -0,0 +1,33 @@
1
+ /**
2
+ * MCPToolProvider — Custom Promptfoo provider for MCP tool-use evaluation.
3
+ *
4
+ * Orchestrates the MCP evaluation flow:
5
+ * 1. Connects to the MCP server and discovers available tools
6
+ * 2. Selects the appropriate LLM backend based on model ID prefix
7
+ * 3. Delegates the multi-turn tool loop to the backend
8
+ * 4. Formats the result for Promptfoo (including tool call summary)
9
+ *
10
+ * Promptfoo config usage:
11
+ *
12
+ * providers:
13
+ * - id: file://dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js
14
+ * label: "Claude Opus 4.6 + MCP"
15
+ * config:
16
+ * model: anthropic:messages:claude-opus-4-6
17
+ * maxToolRounds: 5
18
+ * temperature: 0.2
19
+ * max_tokens: 4096
20
+ * mcpServer:
21
+ * url: https://mcp.sanity.io
22
+ * auth: { type: bearer, token: "{{env.SANITY_API_TOKEN}}" }
23
+ * name: mcp-live-query-documents
24
+ * mcpTools: [query_documents, get_schema]
25
+ */
26
+ import type { CallApiContextParams, ProviderOptions, ProviderResponse } from "./types.js";
27
+ export default class MCPToolProvider {
28
+ config: Record<string, unknown>;
29
+ private providerId;
30
+ constructor(options?: ProviderOptions);
31
+ id(): string;
32
+ callApi(prompt: string, _context?: CallApiContextParams): Promise<ProviderResponse>;
33
+ }
@@ -0,0 +1,191 @@
1
+ /**
2
+ * MCPToolProvider — Custom Promptfoo provider for MCP tool-use evaluation.
3
+ *
4
+ * Orchestrates the MCP evaluation flow:
5
+ * 1. Connects to the MCP server and discovers available tools
6
+ * 2. Selects the appropriate LLM backend based on model ID prefix
7
+ * 3. Delegates the multi-turn tool loop to the backend
8
+ * 4. Formats the result for Promptfoo (including tool call summary)
9
+ *
10
+ * Promptfoo config usage:
11
+ *
12
+ * providers:
13
+ * - id: file://dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js
14
+ * label: "Claude Opus 4.6 + MCP"
15
+ * config:
16
+ * model: anthropic:messages:claude-opus-4-6
17
+ * maxToolRounds: 5
18
+ * temperature: 0.2
19
+ * max_tokens: 4096
20
+ * mcpServer:
21
+ * url: https://mcp.sanity.io
22
+ * auth: { type: bearer, token: "{{env.SANITY_API_TOKEN}}" }
23
+ * name: mcp-live-query-documents
24
+ * mcpTools: [query_documents, get_schema]
25
+ */
26
+ import { config as loadDotenv } from "dotenv";
27
+ import { connectMCP } from "./mcp-connection.js";
28
+ import { runAnthropicToolLoop } from "./tool-loop-anthropic.js";
29
+ import { runOpenAIToolLoop } from "./tool-loop-openai.js";
30
+ loadDotenv({
31
+ override: true,
32
+ path: new URL("../../../../../.env", import.meta.url).pathname,
33
+ });
34
+ // ---------------------------------------------------------------------------
35
+ // Backend registry — maps model ID prefixes to tool loop implementations
36
+ // ---------------------------------------------------------------------------
37
+ const BACKENDS = {
38
+ anthropic: runAnthropicToolLoop,
39
+ openai: runOpenAIToolLoop,
40
+ };
41
+ /**
42
+ * Resolve the LLM backend from a model ID.
43
+ *
44
+ * Model IDs follow the pattern `provider:type:model-name` (e.g.,
45
+ * `anthropic:messages:claude-opus-4-6`). The first segment determines
46
+ * which backend handles the tool loop. For OpenAI, the second segment
47
+ * determines the API variant (`chat` → Chat Completions, `responses` →
48
+ * Responses API).
49
+ */
50
+ function resolveBackend(modelId) {
51
+ const parts = modelId.split(":");
52
+ const prefix = parts[0];
53
+ const backend = BACKENDS[prefix];
54
+ if (!backend) {
55
+ const supported = Object.keys(BACKENDS).join(", ");
56
+ throw new Error(`No backend for model "${modelId}". Supported prefixes: ${supported}`);
57
+ }
58
+ // Extract the model name for the API (e.g., "claude-opus-4-6" from "anthropic:messages:claude-opus-4-6")
59
+ const modelName = parts.length > 2 ? parts.slice(2).join(":") : parts[parts.length - 1];
60
+ // For OpenAI, extract the API variant from the second segment
61
+ let apiVariant;
62
+ if (prefix === "openai" && parts.length > 2) {
63
+ const variant = parts[1];
64
+ if (variant === "responses" || variant === "chat") {
65
+ apiVariant = variant;
66
+ }
67
+ }
68
+ return { backend, modelName, apiVariant };
69
+ }
70
+ // ---------------------------------------------------------------------------
71
+ // Helpers
72
+ // ---------------------------------------------------------------------------
73
+ /** Append a machine-readable tool call summary for assertion detection */
74
+ function appendToolSummary(text, log) {
75
+ if (log.length === 0)
76
+ return text;
77
+ const names = JSON.stringify(log.map((tc) => tc.name));
78
+ return `${text}\n\n<!-- MCP_TOOLS_CALLED: ${names} -->`;
79
+ }
80
+ /** Resolve the API key for a given model prefix */
81
+ function resolveApiKey(prefix, config) {
82
+ if (config.apiKey)
83
+ return String(config.apiKey);
84
+ const envMap = {
85
+ anthropic: "ANTHROPIC_API_KEY",
86
+ openai: "OPENAI_API_KEY",
87
+ };
88
+ const envVar = envMap[prefix];
89
+ return envVar ? process.env[envVar] : undefined;
90
+ }
91
+ // ---------------------------------------------------------------------------
92
+ // Provider class
93
+ // ---------------------------------------------------------------------------
94
+ export default class MCPToolProvider {
95
+ config;
96
+ providerId;
97
+ constructor(options = {}) {
98
+ this.config = options.config || {};
99
+ this.providerId = options.id || "mcp-tool-provider";
100
+ }
101
+ id() {
102
+ return this.providerId;
103
+ }
104
+ async callApi(prompt, _context) {
105
+ const mcpServerConfig = this.config.mcpServer;
106
+ if (!mcpServerConfig) {
107
+ return { error: "mcpServer config is required", output: undefined };
108
+ }
109
+ // Resolve model and backend
110
+ const modelId = this.config.model || "anthropic:messages:claude-opus-4-6";
111
+ let backend;
112
+ let modelName;
113
+ let apiVariant;
114
+ try {
115
+ const resolved = resolveBackend(modelId);
116
+ backend = resolved.backend;
117
+ modelName = resolved.modelName;
118
+ apiVariant = resolved.apiVariant;
119
+ }
120
+ catch (err) {
121
+ return {
122
+ error: err instanceof Error ? err.message : String(err),
123
+ output: undefined,
124
+ };
125
+ }
126
+ // Resolve API key
127
+ const prefix = modelId.split(":")[0];
128
+ const apiKey = resolveApiKey(prefix, this.config);
129
+ if (!apiKey) {
130
+ return {
131
+ error: `API key not found for ${prefix}. Set ${prefix.toUpperCase()}_API_KEY in env or config.apiKey.`,
132
+ output: undefined,
133
+ };
134
+ }
135
+ // Connect to MCP server
136
+ let mcpClient;
137
+ try {
138
+ mcpClient = await connectMCP(mcpServerConfig);
139
+ }
140
+ catch (err) {
141
+ return {
142
+ error: `Failed to connect to MCP server: ${err instanceof Error ? err.message : String(err)}`,
143
+ output: undefined,
144
+ };
145
+ }
146
+ try {
147
+ // Filter tools by capabilities
148
+ const allTools = mcpClient.getAllTools();
149
+ const toolFilter = this.config.mcpTools;
150
+ const tools = toolFilter
151
+ ? allTools.filter((t) => toolFilter.includes(t.name))
152
+ : allTools;
153
+ if (tools.length === 0) {
154
+ return {
155
+ error: "No MCP tools available after filtering. Check mcpTools config and server capabilities.",
156
+ output: undefined,
157
+ };
158
+ }
159
+ // Run the tool loop
160
+ const result = await backend({
161
+ prompt,
162
+ tools,
163
+ callTool: mcpClient.callTool,
164
+ maxToolRounds: this.config.maxToolRounds || 5,
165
+ model: modelName,
166
+ temperature: this.config.temperature ?? 0.2,
167
+ maxTokens: this.config.max_output_tokens ||
168
+ this.config.max_completion_tokens ||
169
+ this.config.max_tokens ||
170
+ 4096,
171
+ apiKey,
172
+ apiVariant,
173
+ providerConfig: this.config,
174
+ });
175
+ return {
176
+ cost: 0,
177
+ metadata: {
178
+ toolRounds: result.toolRounds,
179
+ toolCallLog: result.toolCallLog,
180
+ exhaustedRounds: result.exhaustedRounds,
181
+ latencyMs: result.latencyMs,
182
+ },
183
+ output: appendToolSummary(result.output, result.toolCallLog),
184
+ tokenUsage: result.tokenUsage,
185
+ };
186
+ }
187
+ finally {
188
+ await mcpClient.cleanup().catch(() => { });
189
+ }
190
+ }
191
+ }
@@ -0,0 +1,19 @@
1
+ /**
2
+ * MCP server connection and tool discovery.
3
+ *
4
+ * Handles connecting to an MCP server via streamable-http or stdio transport,
5
+ * discovering available tools, and resolving {{env.VAR}} templates in config.
6
+ */
7
+ import type { MCPClient } from "./types.js";
8
+ /**
9
+ * Connect to an MCP server and return a client for tool discovery and execution.
10
+ *
11
+ * Supports two transport types:
12
+ * - `url` → streamable-http (remote MCP servers like mcp.sanity.io)
13
+ * - `command` → stdio (local MCP server processes)
14
+ */
15
+ export declare function connectMCP(serverConfig: Record<string, unknown>): Promise<MCPClient>;
16
+ /**
17
+ * Resolve `{{env.VAR}}` templates in config values, recursively.
18
+ */
19
+ export declare function resolveEnvTemplates(config: Record<string, unknown>): Record<string, unknown>;
@@ -0,0 +1,101 @@
1
+ /**
2
+ * MCP server connection and tool discovery.
3
+ *
4
+ * Handles connecting to an MCP server via streamable-http or stdio transport,
5
+ * discovering available tools, and resolving {{env.VAR}} templates in config.
6
+ */
7
+ /**
8
+ * Connect to an MCP server and return a client for tool discovery and execution.
9
+ *
10
+ * Supports two transport types:
11
+ * - `url` → streamable-http (remote MCP servers like mcp.sanity.io)
12
+ * - `command` → stdio (local MCP server processes)
13
+ */
14
+ export async function connectMCP(serverConfig) {
15
+ const { Client } = await import("@modelcontextprotocol/sdk/client/index.js");
16
+ const client = new Client({
17
+ name: "ailf-mcp-eval",
18
+ version: "1.0.0",
19
+ });
20
+ const resolvedConfig = resolveEnvTemplates(serverConfig);
21
+ let closeTransport;
22
+ if (resolvedConfig.command) {
23
+ const { StdioClientTransport } = await import("@modelcontextprotocol/sdk/client/stdio.js");
24
+ const parts = String(resolvedConfig.command).split(/\s+/);
25
+ const transport = new StdioClientTransport({
26
+ command: parts[0],
27
+ args: parts.slice(1),
28
+ env: process.env,
29
+ });
30
+ await client.connect(transport);
31
+ closeTransport = () => transport.close();
32
+ }
33
+ else if (resolvedConfig.url) {
34
+ const { StreamableHTTPClientTransport } = await import("@modelcontextprotocol/sdk/client/streamableHttp.js");
35
+ const headers = {};
36
+ // Auth-derived headers (structured auth config)
37
+ const auth = resolvedConfig.auth;
38
+ if (auth?.type === "bearer" && auth.token) {
39
+ headers["Authorization"] = `Bearer ${auth.token}`;
40
+ }
41
+ // Explicit headers override auth-derived ones
42
+ const customHeaders = resolvedConfig.headers;
43
+ if (customHeaders) {
44
+ Object.assign(headers, customHeaders);
45
+ }
46
+ const transport = new StreamableHTTPClientTransport(new URL(String(resolvedConfig.url)), { requestInit: { headers } });
47
+ await client.connect(transport);
48
+ closeTransport = () => transport.close();
49
+ }
50
+ else {
51
+ throw new Error("MCP server config must have either 'command' (stdio) or 'url' (http)");
52
+ }
53
+ // Discover tools
54
+ const { tools: toolsList } = await client.listTools();
55
+ const allTools = toolsList.map((t) => ({
56
+ name: t.name,
57
+ description: t.description,
58
+ inputSchema: t.inputSchema,
59
+ }));
60
+ return {
61
+ getAllTools: () => allTools,
62
+ callTool: async (name, args) => {
63
+ const result = await client.callTool({ name, arguments: args });
64
+ let content = "";
65
+ if (result?.content) {
66
+ if (Array.isArray(result.content)) {
67
+ content = result.content
68
+ .map((c) => c.text || JSON.stringify(c))
69
+ .join("\n");
70
+ }
71
+ else {
72
+ content = String(result.content);
73
+ }
74
+ }
75
+ return { content, error: result.isError ? content : undefined };
76
+ },
77
+ cleanup: async () => {
78
+ await closeTransport().catch(() => { });
79
+ },
80
+ };
81
+ }
82
+ /**
83
+ * Resolve `{{env.VAR}}` templates in config values, recursively.
84
+ */
85
+ export function resolveEnvTemplates(config) {
86
+ const resolved = {};
87
+ for (const [key, value] of Object.entries(config)) {
88
+ if (typeof value === "string") {
89
+ resolved[key] = value.replace(/\{\{env\.(\w+)\}\}/g, (_, varName) => {
90
+ return process.env[varName] || "";
91
+ });
92
+ }
93
+ else if (value && typeof value === "object" && !Array.isArray(value)) {
94
+ resolved[key] = resolveEnvTemplates(value);
95
+ }
96
+ else {
97
+ resolved[key] = value;
98
+ }
99
+ }
100
+ return resolved;
101
+ }
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Anthropic multi-turn tool execution loop.
3
+ *
4
+ * Sends a prompt to the Anthropic Messages API with MCP tools attached.
5
+ * When the model calls a tool, executes it via the MCP client, feeds
6
+ * the result back, and continues until the model produces a final text
7
+ * response or maxToolRounds is exhausted.
8
+ */
9
+ import type { ToolLoopConfig, ToolLoopResult } from "./types.js";
10
+ /**
11
+ * Run a multi-turn tool loop using the Anthropic Messages API.
12
+ *
13
+ * The loop:
14
+ * 1. Sends the prompt with available tools to Claude
15
+ * 2. If Claude calls tools → executes them via MCP, sends results back
16
+ * 3. Repeats until Claude produces a text-only response or maxToolRounds is hit
17
+ * 4. On the last round, omits tools to force a synthesis response
18
+ */
19
+ export declare function runAnthropicToolLoop(config: ToolLoopConfig): Promise<ToolLoopResult>;
@@ -0,0 +1,172 @@
1
+ /**
2
+ * Anthropic multi-turn tool execution loop.
3
+ *
4
+ * Sends a prompt to the Anthropic Messages API with MCP tools attached.
5
+ * When the model calls a tool, executes it via the MCP client, feeds
6
+ * the result back, and continues until the model produces a final text
7
+ * response or maxToolRounds is exhausted.
8
+ */
9
+ // ---------------------------------------------------------------------------
10
+ // Tool loop implementation
11
+ // ---------------------------------------------------------------------------
12
+ /**
13
+ * Run a multi-turn tool loop using the Anthropic Messages API.
14
+ *
15
+ * The loop:
16
+ * 1. Sends the prompt with available tools to Claude
17
+ * 2. If Claude calls tools → executes them via MCP, sends results back
18
+ * 3. Repeats until Claude produces a text-only response or maxToolRounds is hit
19
+ * 4. On the last round, omits tools to force a synthesis response
20
+ */
21
+ export async function runAnthropicToolLoop(config) {
22
+ const { prompt, tools, callTool, maxToolRounds, model, temperature, maxTokens, apiKey, } = config;
23
+ const anthropicTools = tools.map((t) => ({
24
+ name: t.name,
25
+ description: t.description || `MCP tool: ${t.name}`,
26
+ input_schema: t.inputSchema || { type: "object", properties: {} },
27
+ }));
28
+ const systemPrompt = "You are an AI assistant with access to tools provided by an MCP server. " +
29
+ "Use the available tools to complete the task. Call tools with correct parameters, " +
30
+ "interpret responses, and provide a complete answer.";
31
+ const messages = [{ content: prompt, role: "user" }];
32
+ let inputTokens = 0;
33
+ let outputTokens = 0;
34
+ const startTime = Date.now();
35
+ const toolCallLog = [];
36
+ for (let round = 0; round <= maxToolRounds; round++) {
37
+ const isLastRound = round === maxToolRounds;
38
+ // On the last round, omit tools to force a text-only response.
39
+ // Anthropic doesn't support tool_choice: "none" — the way to disable
40
+ // tools is to simply not include them in the request.
41
+ if (isLastRound) {
42
+ const lastMsg = messages[messages.length - 1];
43
+ const synthesisText = "You've used the tools available. Based on the information gathered, " +
44
+ "provide your complete, final answer now.";
45
+ if (lastMsg?.role === "user" && Array.isArray(lastMsg.content)) {
46
+ ;
47
+ lastMsg.content.push({
48
+ type: "text",
49
+ text: synthesisText,
50
+ });
51
+ }
52
+ else {
53
+ messages.push({ content: synthesisText, role: "user" });
54
+ }
55
+ }
56
+ const body = {
57
+ max_tokens: maxTokens,
58
+ messages,
59
+ model,
60
+ system: systemPrompt,
61
+ temperature,
62
+ };
63
+ if (!isLastRound) {
64
+ body.tools = anthropicTools;
65
+ }
66
+ const response = await fetch("https://api.anthropic.com/v1/messages", {
67
+ body: JSON.stringify(body),
68
+ headers: {
69
+ "anthropic-version": "2023-06-01",
70
+ "Content-Type": "application/json",
71
+ "x-api-key": apiKey,
72
+ },
73
+ method: "POST",
74
+ });
75
+ const data = (await response.json());
76
+ if (data.error) {
77
+ throw new Error(data.error.message ??
78
+ `Anthropic API error: ${JSON.stringify(data.error)}`);
79
+ }
80
+ inputTokens += data.usage?.input_tokens ?? 0;
81
+ outputTokens += data.usage?.output_tokens ?? 0;
82
+ if (!data.content?.length) {
83
+ return {
84
+ output: "",
85
+ toolCallLog,
86
+ tokenUsage: {
87
+ prompt: inputTokens,
88
+ completion: outputTokens,
89
+ total: inputTokens + outputTokens,
90
+ },
91
+ toolRounds: round,
92
+ latencyMs: Date.now() - startTime,
93
+ };
94
+ }
95
+ // Add assistant response to history
96
+ messages.push({ content: data.content, role: "assistant" });
97
+ // Check if model wants to use tools
98
+ const toolUseBlocks = data.content.filter((b) => b.type === "tool_use");
99
+ if (data.stop_reason !== "tool_use" || toolUseBlocks.length === 0) {
100
+ // Model is done — extract text
101
+ const textBlocks = data.content.filter((b) => b.type === "text");
102
+ const output = textBlocks.map((b) => b.text || "").join("\n") || "";
103
+ return {
104
+ output,
105
+ toolCallLog,
106
+ tokenUsage: {
107
+ prompt: inputTokens,
108
+ completion: outputTokens,
109
+ total: inputTokens + outputTokens,
110
+ },
111
+ toolRounds: round,
112
+ latencyMs: Date.now() - startTime,
113
+ };
114
+ }
115
+ // Execute each tool call via MCP
116
+ const toolResults = [];
117
+ for (const toolUse of toolUseBlocks) {
118
+ const toolName = toolUse.name;
119
+ const toolInput = (toolUse.input || {});
120
+ try {
121
+ const result = await callTool(toolName, toolInput);
122
+ const content = result.error
123
+ ? JSON.stringify({ error: result.error })
124
+ : result.content;
125
+ toolCallLog.push({ name: toolName, input: toolInput, output: content });
126
+ toolResults.push({
127
+ content,
128
+ tool_use_id: toolUse.id,
129
+ type: "tool_result",
130
+ });
131
+ }
132
+ catch (err) {
133
+ const errMsg = err instanceof Error ? err.message : String(err);
134
+ toolCallLog.push({
135
+ name: toolName,
136
+ input: toolInput,
137
+ output: `Error: ${errMsg}`,
138
+ });
139
+ toolResults.push({
140
+ content: JSON.stringify({ error: errMsg }),
141
+ tool_use_id: toolUse.id,
142
+ type: "tool_result",
143
+ });
144
+ }
145
+ }
146
+ // Add tool results to conversation
147
+ messages.push({ content: toolResults, role: "user" });
148
+ }
149
+ // Exhausted rounds — extract last text
150
+ const lastAssistant = [...messages]
151
+ .reverse()
152
+ .find((m) => m.role === "assistant");
153
+ let lastText = "";
154
+ if (lastAssistant && Array.isArray(lastAssistant.content)) {
155
+ lastText = lastAssistant.content
156
+ .filter((b) => b.type === "text")
157
+ .map((b) => b.text || "")
158
+ .join("\n");
159
+ }
160
+ return {
161
+ output: lastText || "[Exhausted tool rounds without final answer]",
162
+ toolCallLog,
163
+ tokenUsage: {
164
+ prompt: inputTokens,
165
+ completion: outputTokens,
166
+ total: inputTokens + outputTokens,
167
+ },
168
+ toolRounds: maxToolRounds,
169
+ exhaustedRounds: true,
170
+ latencyMs: Date.now() - startTime,
171
+ };
172
+ }
@@ -0,0 +1,19 @@
1
+ /**
2
+ * OpenAI multi-turn tool execution loop.
3
+ *
4
+ * Supports two OpenAI API surfaces:
5
+ * - **Chat Completions** (`/v1/chat/completions`) — used by `openai:chat:*` models
6
+ * - **Responses** (`/v1/responses`) — used by `openai:responses:*` models (GPT-5.x)
7
+ *
8
+ * Both follow the same loop pattern: send prompt → model calls tools → execute
9
+ * via MCP → feed results back → repeat until final text or maxToolRounds.
10
+ */
11
+ import type { ToolLoopConfig, ToolLoopResult } from "./types.js";
12
+ /**
13
+ * Run a multi-turn tool loop using the OpenAI API.
14
+ *
15
+ * Routes to Chat Completions or Responses API based on `config.apiVariant`:
16
+ * - `"responses"` → Responses API (`/v1/responses`)
17
+ * - `"chat"` or undefined → Chat Completions API (`/v1/chat/completions`)
18
+ */
19
+ export declare function runOpenAIToolLoop(config: ToolLoopConfig): Promise<ToolLoopResult>;