@sanity/ailf 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (442) hide show
  1. package/canonical/grader-references/README.md +2 -2
  2. package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
  3. package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
  4. package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
  5. package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
  6. package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
  7. package/config/features.ts +1 -1
  8. package/config/models.ts +28 -23
  9. package/config/sources.ts +1 -1
  10. package/config/thresholds.ts +1 -1
  11. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
  12. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
  13. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
  14. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
  15. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
  16. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
  17. package/dist/_vendor/ailf-core/config-helpers.d.ts +6 -0
  18. package/dist/_vendor/ailf-core/config-helpers.js +29 -0
  19. package/dist/_vendor/ailf-core/examples/index.d.ts +164 -94
  20. package/dist/_vendor/ailf-core/examples/index.js +208 -114
  21. package/dist/_vendor/ailf-core/index.d.ts +1 -0
  22. package/dist/_vendor/ailf-core/index.js +1 -0
  23. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
  24. package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
  25. package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
  26. package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
  27. package/dist/_vendor/ailf-core/ports/context.d.ts +20 -1
  28. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
  29. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  30. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
  31. package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
  32. package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
  33. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -1
  34. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +14 -2
  35. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
  36. package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
  37. package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
  38. package/dist/_vendor/ailf-core/services/index.js +1 -1
  39. package/dist/_vendor/ailf-core/services/scoring.js +9 -0
  40. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +12 -1
  41. package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
  42. package/dist/_vendor/ailf-core/types/index.d.ts +47 -4
  43. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +27 -0
  44. package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
  45. package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
  46. package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
  47. package/dist/_vendor/ailf-tasks/cli.js +61 -0
  48. package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
  49. package/dist/_vendor/ailf-tasks/index.js +16 -0
  50. package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
  51. package/dist/_vendor/ailf-tasks/parser.js +73 -0
  52. package/dist/_vendor/ailf-tasks/schemas.d.ts +198 -0
  53. package/dist/_vendor/ailf-tasks/schemas.js +180 -0
  54. package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
  55. package/dist/_vendor/ailf-tasks/validation.js +162 -0
  56. package/dist/adapters/api-client/remediation.js +2 -2
  57. package/dist/adapters/config-sources/file-config-adapter.js +6 -1
  58. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
  59. package/dist/adapters/index.d.ts +0 -1
  60. package/dist/adapters/index.js +0 -1
  61. package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
  62. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  63. package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
  64. package/dist/adapters/task-sources/content-lake-task-source.js +4 -6
  65. package/dist/adapters/task-sources/index.d.ts +1 -2
  66. package/dist/adapters/task-sources/index.js +1 -2
  67. package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
  68. package/dist/adapters/task-sources/repo-schemas.js +2 -2
  69. package/dist/adapters/task-sources/repo-task-source.js +1 -1
  70. package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
  71. package/dist/adapters/task-sources/repo-trigger.js +1 -1
  72. package/dist/adapters/task-sources/task-file-loader.d.ts +9 -6
  73. package/dist/adapters/task-sources/task-file-loader.js +20 -6
  74. package/dist/agent-observer/test-imports.d.ts +7 -0
  75. package/dist/agent-observer/test-imports.js +185 -0
  76. package/dist/artifact-capture/comparator.d.ts +22 -0
  77. package/dist/artifact-capture/comparator.js +493 -0
  78. package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
  79. package/dist/artifact-capture/filesystem-collector.js +237 -0
  80. package/dist/artifact-capture/redact-artifact.d.ts +20 -0
  81. package/dist/artifact-capture/redact-artifact.js +115 -0
  82. package/dist/assertions/source-isolation.d.ts +1 -1
  83. package/dist/assertions/source-isolation.js +1 -1
  84. package/dist/cli.js +4 -0
  85. package/dist/commands/calculate-scores.js +1 -0
  86. package/dist/commands/capture-compare.d.ts +15 -0
  87. package/dist/commands/capture-compare.js +253 -0
  88. package/dist/commands/capture-list.d.ts +12 -0
  89. package/dist/commands/capture-list.js +147 -0
  90. package/dist/commands/capture.d.ts +9 -0
  91. package/dist/commands/capture.js +16 -0
  92. package/dist/commands/chronic-failures.d.ts +8 -0
  93. package/dist/commands/chronic-failures.js +33 -0
  94. package/dist/commands/explain-handler.d.ts +1 -1
  95. package/dist/commands/explain-handler.js +37 -8
  96. package/dist/commands/fetch-docs.js +1 -0
  97. package/dist/commands/generate-configs.d.ts +3 -3
  98. package/dist/commands/generate-configs.js +20 -8
  99. package/dist/commands/init.d.ts +2 -3
  100. package/dist/commands/init.js +56 -170
  101. package/dist/commands/pipeline-action.d.ts +7 -1
  102. package/dist/commands/pipeline-action.js +43 -19
  103. package/dist/commands/pipeline.d.ts +6 -1
  104. package/dist/commands/pipeline.js +7 -2
  105. package/dist/commands/pr-comment.js +1 -0
  106. package/dist/commands/publish.js +1 -0
  107. package/dist/commands/shared/help.js +2 -2
  108. package/dist/commands/update-quality-scores.d.ts +5 -0
  109. package/dist/commands/update-quality-scores.js +20 -0
  110. package/dist/composition-root.d.ts +2 -3
  111. package/dist/composition-root.js +27 -14
  112. package/dist/config/features.ts +23 -0
  113. package/dist/config/models.ts +100 -0
  114. package/dist/config/prompts.ts +16 -0
  115. package/dist/config/rubrics.ts +225 -0
  116. package/dist/config/schedules.ts +47 -0
  117. package/dist/config/sinks.ts +37 -0
  118. package/dist/config/sources.ts +21 -0
  119. package/dist/config/thresholds.ts +61 -0
  120. package/dist/lib/agent-behavior-report.d.ts +8 -0
  121. package/dist/lib/agent-behavior-report.js +185 -0
  122. package/dist/lib/baseline.d.ts +19 -0
  123. package/dist/lib/baseline.js +153 -0
  124. package/dist/lib/calculate-scores.d.ts +23 -0
  125. package/dist/lib/calculate-scores.js +42 -0
  126. package/dist/lib/compare.d.ts +18 -0
  127. package/dist/lib/compare.js +170 -0
  128. package/dist/lib/coverage-audit.d.ts +4 -0
  129. package/dist/lib/coverage-audit.js +42 -0
  130. package/dist/lib/discovery-report.d.ts +13 -0
  131. package/dist/lib/discovery-report.js +57 -0
  132. package/dist/lib/fetch-docs.d.ts +30 -0
  133. package/dist/lib/fetch-docs.js +171 -0
  134. package/dist/lib/generate-configs.d.ts +25 -0
  135. package/dist/lib/generate-configs.js +42 -0
  136. package/dist/lib/grader-api.d.ts +21 -0
  137. package/dist/lib/grader-api.js +34 -0
  138. package/dist/lib/grader-compare.d.ts +19 -0
  139. package/dist/lib/grader-compare.js +91 -0
  140. package/dist/lib/grader-consistency.d.ts +27 -0
  141. package/dist/lib/grader-consistency.js +79 -0
  142. package/dist/lib/grader-sensitivity.d.ts +19 -0
  143. package/dist/lib/grader-sensitivity.js +75 -0
  144. package/dist/lib/grader-validate.d.ts +19 -0
  145. package/dist/lib/grader-validate.js +78 -0
  146. package/dist/lib/measure-retrieval.d.ts +14 -0
  147. package/dist/lib/measure-retrieval.js +71 -0
  148. package/dist/lib/pr-comment.d.ts +16 -0
  149. package/dist/lib/pr-comment.js +28 -0
  150. package/dist/lib/readiness-report.d.ts +13 -0
  151. package/dist/lib/readiness-report.js +108 -0
  152. package/dist/lib/webhook-server.d.ts +11 -0
  153. package/dist/lib/webhook-server.js +24 -0
  154. package/dist/lib/weekly-digest.d.ts +24 -0
  155. package/dist/lib/weekly-digest.js +148 -0
  156. package/dist/orchestration/build-app-context.js +13 -0
  157. package/dist/orchestration/cache-context.d.ts +23 -0
  158. package/dist/orchestration/cache-context.js +43 -0
  159. package/dist/orchestration/env-bridge.d.ts +21 -0
  160. package/dist/orchestration/env-bridge.js +66 -0
  161. package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
  162. package/dist/orchestration/load-pipeline-tasks.js +52 -0
  163. package/dist/orchestration/pipeline-orchestrator.js +75 -5
  164. package/dist/orchestration/step-runner.js +5 -1
  165. package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
  166. package/dist/orchestration/steps/calculate-scores-step.js +13 -0
  167. package/dist/orchestration/steps/callback-step.js +10 -1
  168. package/dist/orchestration/steps/compare-step.js +6 -3
  169. package/dist/orchestration/steps/discovery-report-step.js +6 -2
  170. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  171. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  172. package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
  173. package/dist/orchestration/steps/fetch-docs-step.js +30 -16
  174. package/dist/orchestration/steps/gap-analysis-step.js +13 -2
  175. package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
  176. package/dist/orchestration/steps/generate-configs-step.js +50 -15
  177. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
  178. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  179. package/dist/orchestration/steps/publish-report-step.js +19 -0
  180. package/dist/orchestration/steps/readiness-step.js +8 -3
  181. package/dist/orchestration/steps/report-step.js +17 -4
  182. package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
  183. package/dist/orchestration/steps/run-eval-step.js +51 -31
  184. package/dist/pipeline/agent-behavior-report.js +6 -0
  185. package/dist/pipeline/attribution.d.ts +1 -1
  186. package/dist/pipeline/attribution.js +1 -1
  187. package/dist/pipeline/cache.js +29 -15
  188. package/dist/pipeline/calculate-scores.d.ts +2 -0
  189. package/dist/pipeline/calculate-scores.js +70 -33
  190. package/dist/pipeline/chronic-failures.d.ts +55 -0
  191. package/dist/pipeline/chronic-failures.js +110 -0
  192. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +33 -0
  193. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
  194. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
  195. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
  196. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
  197. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
  198. package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
  199. package/dist/pipeline/compiler/assertion-mapper.js +1 -1
  200. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
  201. package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
  202. package/dist/pipeline/compiler/config-loader.d.ts +14 -0
  203. package/dist/pipeline/compiler/config-loader.js +42 -2
  204. package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
  205. package/dist/pipeline/compiler/fixture-resolver.js +1 -1
  206. package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
  207. package/dist/pipeline/compiler/ignore-fields.js +1 -1
  208. package/dist/pipeline/compiler/index.d.ts +2 -5
  209. package/dist/pipeline/compiler/index.js +2 -5
  210. package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
  211. package/dist/pipeline/compiler/literacy-bridge.js +1 -1
  212. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +1 -1
  213. package/dist/pipeline/compiler/mode-bases/agent-harness.js +1 -1
  214. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +1 -1
  215. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +1 -1
  216. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +13 -2
  217. package/dist/pipeline/compiler/mode-bases/literacy.js +55 -1
  218. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +1 -1
  219. package/dist/pipeline/compiler/mode-bases/mcp-server.js +1 -1
  220. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +1 -1
  221. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -1
  222. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
  223. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
  224. package/dist/pipeline/compiler/mode-handlers/index.d.ts +2 -2
  225. package/dist/pipeline/compiler/mode-handlers/index.js +2 -2
  226. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +1 -1
  227. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +1 -1
  228. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
  229. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
  230. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
  231. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +1 -1
  232. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
  233. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
  234. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
  235. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +334 -0
  236. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +1 -1
  237. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +1 -1
  238. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +1 -1
  239. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +1 -1
  240. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +4 -0
  241. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +69 -0
  242. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +307 -0
  243. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +22 -5
  244. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +6 -0
  245. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +10 -5
  246. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +314 -7
  247. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +10 -0
  248. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
  249. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
  250. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +1 -1
  251. package/dist/pipeline/compiler/presets/sanity-literacy.js +1 -1
  252. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
  253. package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
  254. package/dist/pipeline/compiler/provider-assembler.js +13 -7
  255. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
  256. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
  257. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
  258. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
  259. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
  260. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
  261. package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
  262. package/dist/pipeline/compiler/sandbox/index.js +1 -1
  263. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
  264. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
  265. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
  266. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
  267. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
  268. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
  269. package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
  270. package/dist/pipeline/compiler/scoring-bridge.js +1 -1
  271. package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
  272. package/dist/pipeline/compiler/task-bridge.js +92 -0
  273. package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
  274. package/dist/pipeline/compiler/task-graph-builder.js +1 -4
  275. package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
  276. package/dist/pipeline/compiler/telemetry/index.js +1 -1
  277. package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
  278. package/dist/pipeline/compiler/variable-resolver.js +1 -1
  279. package/dist/pipeline/coverage-audit.d.ts +1 -1
  280. package/dist/pipeline/coverage-audit.js +1 -1
  281. package/dist/pipeline/degradations.d.ts +1 -1
  282. package/dist/pipeline/degradations.js +1 -1
  283. package/dist/pipeline/failure-modes.d.ts +1 -1
  284. package/dist/pipeline/failure-modes.js +13 -1
  285. package/dist/pipeline/gap-analysis.d.ts +1 -1
  286. package/dist/pipeline/gap-analysis.js +3 -1
  287. package/dist/pipeline/generate-configs.d.ts +2 -2
  288. package/dist/pipeline/generate-configs.js +15 -8
  289. package/dist/pipeline/grader-compare-runner.d.ts +1 -1
  290. package/dist/pipeline/grader-compare-runner.js +7 -1
  291. package/dist/pipeline/grader-comparison.d.ts +1 -1
  292. package/dist/pipeline/grader-comparison.js +1 -1
  293. package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
  294. package/dist/pipeline/grader-consistency-runner.js +7 -1
  295. package/dist/pipeline/grader-consistency.d.ts +1 -1
  296. package/dist/pipeline/grader-consistency.js +1 -1
  297. package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
  298. package/dist/pipeline/grader-sensitivity-runner.js +1 -1
  299. package/dist/pipeline/grader-sensitivity.d.ts +1 -1
  300. package/dist/pipeline/grader-sensitivity.js +1 -1
  301. package/dist/pipeline/grader-validate-runner.d.ts +1 -1
  302. package/dist/pipeline/grader-validate-runner.js +2 -2
  303. package/dist/pipeline/grader-validation.d.ts +1 -1
  304. package/dist/pipeline/grader-validation.js +1 -1
  305. package/dist/pipeline/map-request-to-config.js +15 -2
  306. package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
  307. package/dist/pipeline/mirror-repo-tasks.js +1 -1
  308. package/dist/pipeline/plan-format.d.ts +1 -1
  309. package/dist/pipeline/plan-format.js +1 -1
  310. package/dist/pipeline/plan.d.ts +1 -1
  311. package/dist/pipeline/plan.js +67 -29
  312. package/dist/pipeline/probe.d.ts +1 -1
  313. package/dist/pipeline/probe.js +1 -1
  314. package/dist/pipeline/readiness-report.d.ts +2 -2
  315. package/dist/pipeline/readiness-report.js +2 -2
  316. package/dist/pipeline/release-classification.d.ts +1 -1
  317. package/dist/pipeline/release-classification.js +1 -1
  318. package/dist/pipeline/release-report.d.ts +1 -1
  319. package/dist/pipeline/release-report.js +1 -1
  320. package/dist/pipeline/repo-eval-comment.d.ts +1 -1
  321. package/dist/pipeline/repo-eval-comment.js +1 -1
  322. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  323. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  324. package/dist/pipeline/resolve-mappings.d.ts +6 -6
  325. package/dist/pipeline/resolve-mappings.js +44 -44
  326. package/dist/pipeline/retrieval-metrics.d.ts +3 -3
  327. package/dist/pipeline/retrieval-metrics.js +28 -20
  328. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  329. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  330. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  331. package/dist/pipeline/steps/compare-step.js +90 -0
  332. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  333. package/dist/pipeline/steps/eval-step.js +347 -0
  334. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  335. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  336. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  337. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  338. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  339. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  340. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  341. package/dist/pipeline/steps/publish-report-step.js +243 -0
  342. package/dist/pipeline/steps/report-step.d.ts +13 -0
  343. package/dist/pipeline/steps/report-step.js +56 -0
  344. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  345. package/dist/pipeline/steps/update-scores-step.js +42 -0
  346. package/dist/pipeline/targeted-loo.d.ts +1 -1
  347. package/dist/pipeline/targeted-loo.js +1 -1
  348. package/dist/pipeline/thresholds.d.ts +1 -1
  349. package/dist/pipeline/thresholds.js +1 -1
  350. package/dist/pipeline/validate.js +13 -0
  351. package/dist/report-store.d.ts +17 -0
  352. package/dist/report-store.js +24 -0
  353. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  354. package/dist/scripts/agent-behavior-report.js +315 -0
  355. package/dist/scripts/baseline.d.ts +43 -0
  356. package/dist/scripts/baseline.js +267 -0
  357. package/dist/scripts/calculate-scores.d.ts +166 -0
  358. package/dist/scripts/calculate-scores.js +1296 -0
  359. package/dist/scripts/compare.d.ts +22 -0
  360. package/dist/scripts/compare.js +334 -0
  361. package/dist/scripts/coverage-audit.d.ts +44 -0
  362. package/dist/scripts/coverage-audit.js +209 -0
  363. package/dist/scripts/debug-eval.d.ts +19 -0
  364. package/dist/scripts/debug-eval.js +73 -0
  365. package/dist/scripts/discovery-report.d.ts +58 -0
  366. package/dist/scripts/discovery-report.js +250 -0
  367. package/dist/scripts/fetch-docs.d.ts +35 -0
  368. package/dist/scripts/fetch-docs.js +472 -0
  369. package/dist/scripts/generate-configs.d.ts +66 -0
  370. package/dist/scripts/generate-configs.js +459 -0
  371. package/dist/scripts/grader-api.d.ts +27 -0
  372. package/dist/scripts/grader-api.js +206 -0
  373. package/dist/scripts/grader-compare.d.ts +22 -0
  374. package/dist/scripts/grader-compare.js +368 -0
  375. package/dist/scripts/grader-consistency.d.ts +20 -0
  376. package/dist/scripts/grader-consistency.js +313 -0
  377. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  378. package/dist/scripts/grader-sensitivity.js +354 -0
  379. package/dist/scripts/grader-validate.d.ts +19 -0
  380. package/dist/scripts/grader-validate.js +267 -0
  381. package/dist/scripts/measure-retrieval.d.ts +10 -0
  382. package/dist/scripts/measure-retrieval.js +145 -0
  383. package/dist/scripts/migrate-task-mode.d.ts +1 -1
  384. package/dist/scripts/migrate-task-mode.js +1 -1
  385. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
  386. package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
  387. package/dist/scripts/pipeline.d.ts +76 -0
  388. package/dist/scripts/pipeline.js +1031 -0
  389. package/dist/scripts/pr-comment.d.ts +10 -0
  390. package/dist/scripts/pr-comment.js +510 -0
  391. package/dist/scripts/readiness-report.d.ts +88 -0
  392. package/dist/scripts/readiness-report.js +342 -0
  393. package/dist/scripts/update-quality-scores.d.ts +15 -0
  394. package/dist/scripts/update-quality-scores.js +184 -0
  395. package/dist/scripts/validate-task-sources.d.ts +1 -1
  396. package/dist/scripts/validate-task-sources.js +1 -1
  397. package/dist/scripts/validate.d.ts +13 -0
  398. package/dist/scripts/validate.js +79 -0
  399. package/dist/scripts/webhook-server.d.ts +26 -0
  400. package/dist/scripts/webhook-server.js +147 -0
  401. package/dist/scripts/weekly-digest.d.ts +24 -0
  402. package/dist/scripts/weekly-digest.js +144 -0
  403. package/dist/sinks/format-slack.d.ts +64 -0
  404. package/dist/sinks/format-slack.js +306 -0
  405. package/dist/sinks/slack-sink.d.ts +27 -0
  406. package/dist/sinks/slack-sink.js +78 -0
  407. package/dist/sinks/types.d.ts +1 -1
  408. package/dist/sinks/types.js +1 -1
  409. package/dist/sinks/webhook-sink.d.ts +19 -0
  410. package/dist/sinks/webhook-sink.js +50 -0
  411. package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
  412. package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
  413. package/dist/tasks/literacy/content-lake.task.ts +181 -0
  414. package/dist/tasks/literacy/frameworks.task.ts +129 -0
  415. package/dist/tasks/literacy/functions.task.ts +70 -0
  416. package/dist/tasks/literacy/groq.task.ts +259 -0
  417. package/dist/tasks/literacy/image-handling.task.ts +95 -0
  418. package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
  419. package/dist/tasks/literacy/portable-text.task.ts +169 -0
  420. package/dist/tasks/literacy/studio-setup.task.ts +134 -0
  421. package/dist/tasks/literacy/visual-editing.task.ts +147 -0
  422. package/package.json +24 -24
  423. package/tasks/.expanded.agentic.yaml +280 -0
  424. package/tasks/.expanded.yaml +565 -0
  425. package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
  426. package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
  427. package/tasks/literacy/content-lake.task.ts +181 -0
  428. package/tasks/literacy/frameworks.task.ts +1 -0
  429. package/tasks/literacy/functions.task.ts +1 -0
  430. package/tasks/literacy/groq.task.ts +1 -0
  431. package/tasks/literacy/image-handling.task.ts +95 -0
  432. package/tasks/literacy/nextjs-live.task.ts +2 -1
  433. package/tasks/literacy/portable-text.task.ts +169 -0
  434. package/tasks/literacy/studio-setup.task.ts +5 -2
  435. package/tasks/literacy/visual-editing.task.ts +1 -0
  436. package/LICENSE +0 -21
  437. package/tasks/frameworks.yaml +0 -98
  438. package/tasks/functions.yaml +0 -51
  439. package/tasks/groq.yaml +0 -216
  440. package/tasks/nextjs-live.yaml +0 -62
  441. package/tasks/studio-setup.yaml +0 -111
  442. package/tasks/visual-editing.yaml +0 -120
@@ -0,0 +1,307 @@
1
+ /**
2
+ * MCPServerModeHandler — compilation rules for `mcp-server` evaluation mode.
3
+ *
4
+ * This is the first non-literacy mode handler, proving the compiler
5
+ * architecture works end-to-end. It translates MCP server task definitions
6
+ * into Promptfoo configuration with:
7
+ *
8
+ * - An MCP provider that wraps the server under test
9
+ * - Tool-call assertions compiled to Promptfoo `javascript` assertions
10
+ * - Server lifecycle management via Promptfoo provider hooks
11
+ * - Multi-turn conversation support via Promptfoo's `steps` syntax
12
+ *
13
+ * Promptfoo supports MCP servers as providers natively:
14
+ * ```yaml
15
+ * providers:
16
+ * - id: mcp:./my-server
17
+ * config:
18
+ * command: node
19
+ * args: [./dist/server.js]
20
+ * env: { API_KEY: "..." }
21
+ * ```
22
+ *
23
+ * This handler assembles that config from AILF's `MCPServerTaskDefinition`.
24
+ *
25
+ * @see docs/exec-plans/architecture-overhaul/phase-3-mcp-server-mode.md
26
+ * @see packages/core/src/types/eval-mode-config.ts — MCPServerModeConfig
27
+ * @see packages/core/src/types/generalized-task.ts — MCPServerTaskDefinition
28
+ */
29
+ import { buildMCPAssertions, } from "./mcp-assertions.js";
30
+ // ---------------------------------------------------------------------------
31
+ // Canonical MCP server prompt templates
32
+ // ---------------------------------------------------------------------------
33
+ // Handler-owned prompts for MCP server evaluations. Instructs the model to
34
+ // interact with MCP tools rather than writing standalone code.
35
+ export const MCP_PROMPT_TEMPLATES = {
36
+ "mcp-server": {
37
+ id: "mcp-server",
38
+ label: "MCP Server Tool Use",
39
+ template: `You are an AI assistant with access to an MCP (Model Context Protocol) server that provides tools for interacting with a Sanity content backend.
40
+
41
+ ## Task
42
+ {{task}}
43
+
44
+ ## Instructions
45
+
46
+ 1. Use the available MCP tools to complete the task
47
+ 2. Call tools with the correct parameters as described in their schemas
48
+ 3. Interpret tool responses and use the results to accomplish the goal
49
+ 4. If a tool returns an error, explain the issue clearly
50
+ 5. Prefer using specific tools over broad queries when possible
51
+
52
+ Complete the task using the MCP tools provided:
53
+ `,
54
+ variables: ["task"],
55
+ },
56
+ };
57
+ /**
58
+ * Validate that an MCP task definition has all required fields.
59
+ */
60
+ export function validateMCPTask(task) {
61
+ const errors = [];
62
+ if (!task.id) {
63
+ errors.push({ field: "id", message: "Task ID is required" });
64
+ }
65
+ if (!task.title) {
66
+ errors.push({ field: "title", message: "Task title is required" });
67
+ }
68
+ if (task.serverConfig) {
69
+ const { transport, command, url } = task.serverConfig;
70
+ if (transport === "stdio" && !command) {
71
+ errors.push({
72
+ field: "serverConfig.command",
73
+ message: "Server command is required for stdio transport (e.g., 'node dist/server.js')",
74
+ });
75
+ }
76
+ if ((transport === "sse" || transport === "streamable-http") && !url) {
77
+ errors.push({
78
+ field: "serverConfig.url",
79
+ message: `Server URL is required for ${transport} transport`,
80
+ });
81
+ }
82
+ }
83
+ // Assertions should reference MCP-compatible types
84
+ if (task.assertions) {
85
+ for (const assertion of task.assertions) {
86
+ if (assertion.type === "tool-called" &&
87
+ !("value" in assertion && assertion.value)) {
88
+ errors.push({
89
+ field: "assertions",
90
+ message: 'tool-called assertion requires a "value" specifying the tool name',
91
+ });
92
+ }
93
+ }
94
+ }
95
+ return errors;
96
+ }
97
+ // ---------------------------------------------------------------------------
98
+ // Compilation
99
+ // ---------------------------------------------------------------------------
100
+ /**
101
+ * Compile an MCP server task definition into Promptfoo configuration.
102
+ *
103
+ * This is the core of the MCP mode handler. It produces:
104
+ * 1. A provider config pointing to the MCP server
105
+ * 2. Test cases with tool-call assertions
106
+ * 3. Appropriate prompts for the evaluation
107
+ */
108
+ export function compileMCPTask(task, options) {
109
+ const warnings = [];
110
+ // Validate
111
+ const validationErrors = validateMCPTask(task);
112
+ if (validationErrors.length > 0) {
113
+ for (const err of validationErrors) {
114
+ warnings.push(`MCP task "${task.id}": ${err.field} — ${err.message}`);
115
+ }
116
+ }
117
+ // Build providers (one LLM provider per model, each with MCP config)
118
+ const providers = buildMCPProvider(task, options?.models ?? [], warnings);
119
+ // Build prompts
120
+ const prompts = buildMCPPrompts(task);
121
+ // Build test cases
122
+ const tests = buildMCPTestCases(task, options, warnings);
123
+ return { providers, tests, prompts, warnings };
124
+ }
125
+ // ---------------------------------------------------------------------------
126
+ // Provider assembly
127
+ // ---------------------------------------------------------------------------
128
+ /** Default max tool rounds for MCP multi-turn execution */
129
+ const DEFAULT_MAX_TOOL_ROUNDS = 5;
130
+ /** Provider path relative to eval package dist */
131
+ const MCP_PROVIDER_PATH = "file://dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js";
132
+ /**
133
+ * Build custom MCP tool provider configs — one per model.
134
+ *
135
+ * Each provider uses the custom mcp-tool-provider.ts which implements a
136
+ * multi-turn tool execution loop. The LLM receives a prompt, discovers
137
+ * MCP tools, calls them, gets results, and continues until it produces
138
+ * a final text answer or exhausts maxToolRounds.
139
+ *
140
+ * Config shape passed to the custom provider:
141
+ * { model, mcpServer: { url, auth, name }, mcpTools, maxToolRounds, temperature, ... }
142
+ */
143
+ function buildMCPProvider(task, models, warnings) {
144
+ // Build the MCP server config
145
+ const mcpServer = buildMCPServerConfig(task, warnings);
146
+ const mcpTools = task.capabilities ?? undefined;
147
+ const maxToolRounds = task.maxToolRounds ?? DEFAULT_MAX_TOOL_ROUNDS;
148
+ // Helper to build a provider entry for a given model
149
+ function makeProvider(modelId, label, modelConfig) {
150
+ return {
151
+ id: MCP_PROVIDER_PATH,
152
+ label: `${label} + MCP`,
153
+ config: {
154
+ model: modelId,
155
+ mcpServer,
156
+ ...(mcpTools ? { mcpTools } : {}),
157
+ maxToolRounds,
158
+ ...(modelConfig ?? {}),
159
+ },
160
+ };
161
+ }
162
+ // Task-level model override takes precedence over registry models
163
+ const taskModels = task.models;
164
+ if (taskModels && taskModels.length > 0) {
165
+ return taskModels.map((modelId) => makeProvider(modelId, modelId));
166
+ }
167
+ // Use registry models (already filtered to mcp-server mode)
168
+ if (models.length === 0) {
169
+ warnings.push(`MCP task "${task.id}": no models available. Add "mcp-server" to a ` +
170
+ "model's modes array in config/models.ts, or set models on the task.");
171
+ return [
172
+ makeProvider("anthropic:messages:claude-sonnet-4-20250514", "Claude Sonnet 4"),
173
+ ];
174
+ }
175
+ return models.map((model) => makeProvider(model.id, model.label, model.config));
176
+ }
177
+ /**
178
+ * Build the MCP server connection config for the custom provider.
179
+ *
180
+ * Shape: { url?, command?, name?, auth? }
181
+ * The custom mcp-tool-provider.ts uses this to connect to the MCP server.
182
+ */
183
+ function buildMCPServerConfig(task, warnings) {
184
+ const config = task.serverConfig;
185
+ if (!config) {
186
+ warnings.push(`MCP task "${task.id}": no serverConfig — using placeholder. ` +
187
+ "Set serverConfig.command or serverConfig.url to point to your MCP server.");
188
+ return { name: task.id };
189
+ }
190
+ const serverConfig = { name: task.id };
191
+ if (config.transport === "stdio") {
192
+ serverConfig.command = config.command;
193
+ }
194
+ else {
195
+ serverConfig.url = config.url;
196
+ }
197
+ // Auth config
198
+ if (config.auth) {
199
+ serverConfig.auth = config.auth;
200
+ }
201
+ else if (config.env) {
202
+ const tokenKey = Object.keys(config.env).find((k) => /token|auth|key/i.test(k));
203
+ if (tokenKey) {
204
+ const val = config.env[tokenKey];
205
+ let envVar = val;
206
+ if (val.startsWith("$env(") && val.endsWith(")")) {
207
+ envVar = val.slice(5, -1);
208
+ }
209
+ if (!envVar || !/^[A-Za-z_][A-Za-z0-9_]*$/.test(envVar)) {
210
+ warnings.push(`MCP task: env var name "${envVar}" from "${val}" is not a valid ` +
211
+ "identifier — skipping auth config");
212
+ }
213
+ else {
214
+ serverConfig.auth = {
215
+ type: "bearer",
216
+ token: `{{env.${envVar}}}`,
217
+ };
218
+ }
219
+ }
220
+ }
221
+ return serverConfig;
222
+ }
223
+ // ---------------------------------------------------------------------------
224
+ // Prompt assembly
225
+ // ---------------------------------------------------------------------------
226
+ function buildMCPPrompts(task) {
227
+ // MCP mode uses a single prompt — the task description
228
+ const promptText = task.prompt?.text ??
229
+ task.prompt?.vars?.task ??
230
+ task.description ??
231
+ `Test MCP server: ${task.title}`;
232
+ return [
233
+ {
234
+ id: "mcp-test",
235
+ label: `MCP: ${task.title}`,
236
+ raw: String(promptText),
237
+ },
238
+ ];
239
+ }
240
+ // ---------------------------------------------------------------------------
241
+ // Test case assembly
242
+ // ---------------------------------------------------------------------------
243
+ function buildMCPTestCases(task, options, warnings) {
244
+ const tests = [];
245
+ // Build assertion context
246
+ const assertionContext = {
247
+ capabilities: task.capabilities ?? [],
248
+ graderProvider: options?.graderProvider,
249
+ taskId: task.id,
250
+ };
251
+ // Compile assertions
252
+ // Cast GeneralizedAssertionDefinition[] → AssertionInput[] (structurally compatible)
253
+ const assertions = [];
254
+ if (task.assertions) {
255
+ const rawAssertions = task.assertions;
256
+ const { assertions: mapped, warnings: assertionWarnings } = buildMCPAssertions(rawAssertions, assertionContext);
257
+ assertions.push(...mapped);
258
+ warnings.push(...assertionWarnings);
259
+ }
260
+ // Build test case vars
261
+ const vars = {
262
+ task: task.prompt?.vars?.task ?? task.description ?? `Test: ${task.title}`,
263
+ ...(task.prompt?.vars ?? {}),
264
+ };
265
+ // Primary test case
266
+ tests.push({
267
+ description: `${task.id} — ${task.title}`,
268
+ vars,
269
+ ...(assertions.length > 0 ? { assert: assertions } : {}),
270
+ });
271
+ // Multi-turn test cases
272
+ if (task.multiTurn?.turns && task.multiTurn.turns.length > 0) {
273
+ tests.push({
274
+ description: `${task.id} — ${task.title} [multi-turn]`,
275
+ vars: {
276
+ ...vars,
277
+ __multiTurn: task.multiTurn.turns,
278
+ },
279
+ ...(assertions.length > 0 ? { assert: assertions } : {}),
280
+ });
281
+ }
282
+ return tests;
283
+ }
284
+ // ---------------------------------------------------------------------------
285
+ // ModeHandler adapter
286
+ // ---------------------------------------------------------------------------
287
+ /** ModeHandler-conformant export for the mcp-server evaluation mode. */
288
+ export const handler = {
289
+ getPrompts() {
290
+ return MCP_PROMPT_TEMPLATES;
291
+ },
292
+ compileTask(task, ctx) {
293
+ if (!("mode" in task) || task.mode !== "mcp-server") {
294
+ throw new Error(`MCP server handler received task with mode "${task.mode ?? "undefined"}" — expected "mcp-server"`);
295
+ }
296
+ const result = compileMCPTask(task, {
297
+ graderProvider: ctx.graderProvider,
298
+ models: ctx.models,
299
+ });
300
+ return {
301
+ providers: result.providers,
302
+ tests: result.tests,
303
+ prompts: result.prompts,
304
+ warnings: result.warnings,
305
+ };
306
+ },
307
+ };
@@ -43,19 +43,29 @@ const BACKENDS = {
43
43
  *
44
44
  * Model IDs follow the pattern `provider:type:model-name` (e.g.,
45
45
  * `anthropic:messages:claude-opus-4-6`). The first segment determines
46
- * which backend handles the tool loop.
46
+ * which backend handles the tool loop. For OpenAI, the second segment
47
+ * determines the API variant (`chat` → Chat Completions, `responses` →
48
+ * Responses API).
47
49
  */
48
50
  function resolveBackend(modelId) {
49
- const prefix = modelId.split(":")[0];
51
+ const parts = modelId.split(":");
52
+ const prefix = parts[0];
50
53
  const backend = BACKENDS[prefix];
51
54
  if (!backend) {
52
55
  const supported = Object.keys(BACKENDS).join(", ");
53
56
  throw new Error(`No backend for model "${modelId}". Supported prefixes: ${supported}`);
54
57
  }
55
58
  // Extract the model name for the API (e.g., "claude-opus-4-6" from "anthropic:messages:claude-opus-4-6")
56
- const parts = modelId.split(":");
57
59
  const modelName = parts.length > 2 ? parts.slice(2).join(":") : parts[parts.length - 1];
58
- return { backend, modelName };
60
+ // For OpenAI, extract the API variant from the second segment
61
+ let apiVariant;
62
+ if (prefix === "openai" && parts.length > 2) {
63
+ const variant = parts[1];
64
+ if (variant === "responses" || variant === "chat") {
65
+ apiVariant = variant;
66
+ }
67
+ }
68
+ return { backend, modelName, apiVariant };
59
69
  }
60
70
  // ---------------------------------------------------------------------------
61
71
  // Helpers
@@ -100,10 +110,12 @@ export default class MCPToolProvider {
100
110
  const modelId = this.config.model || "anthropic:messages:claude-opus-4-6";
101
111
  let backend;
102
112
  let modelName;
113
+ let apiVariant;
103
114
  try {
104
115
  const resolved = resolveBackend(modelId);
105
116
  backend = resolved.backend;
106
117
  modelName = resolved.modelName;
118
+ apiVariant = resolved.apiVariant;
107
119
  }
108
120
  catch (err) {
109
121
  return {
@@ -152,8 +164,13 @@ export default class MCPToolProvider {
152
164
  maxToolRounds: this.config.maxToolRounds || 5,
153
165
  model: modelName,
154
166
  temperature: this.config.temperature ?? 0.2,
155
- maxTokens: this.config.max_tokens || 4096,
167
+ maxTokens: this.config.max_output_tokens ||
168
+ this.config.max_completion_tokens ||
169
+ this.config.max_tokens ||
170
+ 4096,
156
171
  apiKey,
172
+ apiVariant,
173
+ providerConfig: this.config,
157
174
  });
158
175
  return {
159
176
  cost: 0,
@@ -33,10 +33,16 @@ export async function connectMCP(serverConfig) {
33
33
  else if (resolvedConfig.url) {
34
34
  const { StreamableHTTPClientTransport } = await import("@modelcontextprotocol/sdk/client/streamableHttp.js");
35
35
  const headers = {};
36
+ // Auth-derived headers (structured auth config)
36
37
  const auth = resolvedConfig.auth;
37
38
  if (auth?.type === "bearer" && auth.token) {
38
39
  headers["Authorization"] = `Bearer ${auth.token}`;
39
40
  }
41
+ // Explicit headers override auth-derived ones
42
+ const customHeaders = resolvedConfig.headers;
43
+ if (customHeaders) {
44
+ Object.assign(headers, customHeaders);
45
+ }
40
46
  const transport = new StreamableHTTPClientTransport(new URL(String(resolvedConfig.url)), { requestInit: { headers } });
41
47
  await client.connect(transport);
42
48
  closeTransport = () => transport.close();
@@ -1,14 +1,19 @@
1
1
  /**
2
2
  * OpenAI multi-turn tool execution loop.
3
3
  *
4
- * Placeholder for future implementation. Will use the OpenAI Chat
5
- * Completions or Responses API with function calling to execute MCP
6
- * tools in a multi-turn loop.
4
+ * Supports two OpenAI API surfaces:
5
+ * - **Chat Completions** (`/v1/chat/completions`) used by `openai:chat:*` models
6
+ * - **Responses** (`/v1/responses`) — used by `openai:responses:*` models (GPT-5.x)
7
+ *
8
+ * Both follow the same loop pattern: send prompt → model calls tools → execute
9
+ * via MCP → feed results back → repeat until final text or maxToolRounds.
7
10
  */
8
11
  import type { ToolLoopConfig, ToolLoopResult } from "./types.js";
9
12
  /**
10
13
  * Run a multi-turn tool loop using the OpenAI API.
11
14
  *
12
- * @throws {Error} Not yet implemented
15
+ * Routes to Chat Completions or Responses API based on `config.apiVariant`:
16
+ * - `"responses"` → Responses API (`/v1/responses`)
17
+ * - `"chat"` or undefined → Chat Completions API (`/v1/chat/completions`)
13
18
  */
14
- export declare function runOpenAIToolLoop(_config: ToolLoopConfig): Promise<ToolLoopResult>;
19
+ export declare function runOpenAIToolLoop(config: ToolLoopConfig): Promise<ToolLoopResult>;