@sanity/ailf 0.4.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. package/config/features.ts +23 -0
  2. package/config/models.ts +83 -0
  3. package/config/prompts.ts +16 -0
  4. package/config/rubrics.ts +225 -0
  5. package/config/schedules.ts +47 -0
  6. package/config/sinks.ts +37 -0
  7. package/config/sources.ts +21 -0
  8. package/config/thresholds.ts +61 -0
  9. package/dist/_vendor/ailf-core/config-helpers.d.ts +174 -0
  10. package/dist/_vendor/ailf-core/config-helpers.js +150 -0
  11. package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
  12. package/dist/_vendor/ailf-core/env-helper.js +45 -0
  13. package/dist/_vendor/ailf-core/examples/index.d.ts +10 -10
  14. package/dist/_vendor/ailf-core/examples/index.js +10 -10
  15. package/dist/_vendor/ailf-core/index.d.ts +3 -0
  16. package/dist/_vendor/ailf-core/index.js +5 -0
  17. package/dist/_vendor/ailf-core/ports/context.d.ts +15 -2
  18. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
  19. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
  20. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
  21. package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
  22. package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
  23. package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
  24. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +7 -2
  25. package/dist/_vendor/ailf-core/schemas/eval-config.js +7 -2
  26. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +8 -3
  27. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +6 -1
  28. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +32 -31
  29. package/dist/_vendor/ailf-core/schemas/pipeline.js +52 -12
  30. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
  31. package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
  32. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
  33. package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
  34. package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
  35. package/dist/_vendor/ailf-core/services/index.js +2 -1
  36. package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
  37. package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
  38. package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
  39. package/dist/_vendor/ailf-core/services/scoring.js +25 -15
  40. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
  41. package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
  42. package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
  43. package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
  44. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +319 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
  46. package/dist/_vendor/ailf-core/types/index.d.ts +45 -81
  47. package/dist/_vendor/ailf-core/types/index.js +8 -1
  48. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +202 -0
  49. package/dist/_vendor/ailf-core/types/plugin-registry.js +132 -0
  50. package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
  51. package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
  52. package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
  53. package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
  54. package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
  55. package/dist/_vendor/ailf-core/types/trace.js +18 -0
  56. package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
  57. package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
  58. package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
  59. package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
  60. package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
  61. package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
  62. package/dist/_vendor/ailf-shared/index.d.ts +0 -1
  63. package/dist/_vendor/ailf-shared/index.js +0 -1
  64. package/dist/adapters/api-client/build-request.js +14 -13
  65. package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
  66. package/dist/adapters/config-sources/file-config-adapter.js +38 -12
  67. package/dist/adapters/config-sources/index.d.ts +2 -0
  68. package/dist/adapters/config-sources/index.js +1 -0
  69. package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
  70. package/dist/adapters/config-sources/ts-config-loader.js +133 -0
  71. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
  72. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
  73. package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
  74. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  75. package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
  76. package/dist/adapters/task-sources/content-lake-task-source.js +22 -23
  77. package/dist/adapters/task-sources/index.d.ts +1 -0
  78. package/dist/adapters/task-sources/index.js +1 -0
  79. package/dist/adapters/task-sources/repo-task-source.d.ts +4 -4
  80. package/dist/adapters/task-sources/repo-task-source.js +69 -16
  81. package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
  82. package/dist/adapters/task-sources/task-file-loader.js +83 -0
  83. package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
  84. package/dist/adapters/task-sources/yaml-task-source.js +19 -16
  85. package/dist/cli.js +0 -2
  86. package/dist/commands/baseline.js +4 -1
  87. package/dist/commands/calculate-scores.js +1 -1
  88. package/dist/commands/coverage-audit.js +7 -1
  89. package/dist/commands/explain-handler.js +25 -23
  90. package/dist/commands/fetch-docs.js +3 -2
  91. package/dist/commands/generate-configs.js +1 -1
  92. package/dist/commands/interactive.js +11 -7
  93. package/dist/commands/pipeline-action.d.ts +2 -0
  94. package/dist/commands/pipeline-action.js +16 -6
  95. package/dist/commands/pipeline.d.ts +1 -0
  96. package/dist/commands/pipeline.js +4 -2
  97. package/dist/commands/pr-comment.js +1 -1
  98. package/dist/commands/publish.js +2 -2
  99. package/dist/commands/readiness-report.js +13 -6
  100. package/dist/composition-root.d.ts +1 -1
  101. package/dist/composition-root.js +67 -4
  102. package/dist/orchestration/build-app-context.js +1 -0
  103. package/dist/orchestration/build-step-sequence.js +24 -6
  104. package/dist/orchestration/steps/calculate-scores-step.js +24 -11
  105. package/dist/orchestration/steps/fetch-docs-step.js +6 -4
  106. package/dist/orchestration/steps/gap-analysis-step.js +8 -7
  107. package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
  108. package/dist/orchestration/steps/generate-configs-step.js +245 -51
  109. package/dist/orchestration/steps/grader-consistency-step.js +7 -4
  110. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  111. package/dist/orchestration/steps/readiness-step.js +5 -6
  112. package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
  113. package/dist/orchestration/steps/run-eval-step.js +8 -7
  114. package/dist/pipeline/cache.d.ts +1 -1
  115. package/dist/pipeline/cache.js +36 -8
  116. package/dist/pipeline/calculate-scores.d.ts +5 -7
  117. package/dist/pipeline/calculate-scores.js +74 -153
  118. package/dist/pipeline/checks.js +2 -2
  119. package/dist/pipeline/compare.js +8 -8
  120. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
  121. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
  122. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
  123. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
  124. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
  125. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
  126. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
  127. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
  128. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
  129. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +355 -0
  130. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
  131. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
  132. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
  133. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
  134. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
  135. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +471 -0
  136. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
  137. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
  138. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
  139. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
  140. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
  141. package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
  142. package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
  143. package/dist/pipeline/compiler/assertion-mapper.js +175 -0
  144. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
  145. package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
  146. package/dist/pipeline/compiler/config-loader.d.ts +56 -0
  147. package/dist/pipeline/compiler/config-loader.js +111 -0
  148. package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
  149. package/dist/pipeline/compiler/fixture-resolver.js +113 -0
  150. package/dist/pipeline/compiler/hash.d.ts +11 -0
  151. package/dist/pipeline/compiler/hash.js +18 -0
  152. package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
  153. package/dist/pipeline/compiler/ignore-fields.js +113 -0
  154. package/dist/pipeline/compiler/index.d.ts +29 -0
  155. package/dist/pipeline/compiler/index.js +45 -0
  156. package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
  157. package/dist/pipeline/compiler/literacy-bridge.js +172 -0
  158. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
  159. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
  160. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
  161. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
  162. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
  163. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
  164. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
  165. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
  166. package/dist/pipeline/compiler/mode-handlers/index.d.ts +16 -0
  167. package/dist/pipeline/compiler/mode-handlers/index.js +21 -0
  168. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
  169. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
  170. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
  171. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
  172. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
  173. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +277 -0
  174. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +67 -0
  175. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +309 -0
  176. package/dist/pipeline/compiler/presets/index.d.ts +9 -0
  177. package/dist/pipeline/compiler/presets/index.js +8 -0
  178. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +45 -0
  179. package/dist/pipeline/compiler/presets/sanity-literacy.js +354 -0
  180. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
  181. package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
  182. package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
  183. package/dist/pipeline/compiler/provider-assembler.js +137 -0
  184. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
  185. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
  186. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
  187. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
  188. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
  189. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
  190. package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
  191. package/dist/pipeline/compiler/sandbox/index.js +11 -0
  192. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
  193. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
  194. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
  195. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
  196. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
  197. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
  198. package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
  199. package/dist/pipeline/compiler/scoring-bridge.js +114 -0
  200. package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
  201. package/dist/pipeline/compiler/task-graph-builder.js +291 -0
  202. package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
  203. package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
  204. package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
  205. package/dist/pipeline/compiler/telemetry/index.js +19 -0
  206. package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
  207. package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
  208. package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
  209. package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
  210. package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
  211. package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
  212. package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
  213. package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
  214. package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
  215. package/dist/pipeline/compiler/variable-resolver.js +115 -0
  216. package/dist/pipeline/coverage-audit.d.ts +15 -5
  217. package/dist/pipeline/coverage-audit.js +41 -22
  218. package/dist/pipeline/eval-constants.d.ts +16 -6
  219. package/dist/pipeline/eval-constants.js +25 -4
  220. package/dist/pipeline/eval-fingerprint.d.ts +2 -2
  221. package/dist/pipeline/eval-fingerprint.js +8 -9
  222. package/dist/pipeline/expand-tasks.d.ts +23 -14
  223. package/dist/pipeline/expand-tasks.js +37 -31
  224. package/dist/pipeline/gap-analysis.d.ts +1 -1
  225. package/dist/pipeline/gap-analysis.js +2 -2
  226. package/dist/pipeline/generate-configs.d.ts +22 -4
  227. package/dist/pipeline/generate-configs.js +53 -24
  228. package/dist/pipeline/grader-api.d.ts +3 -3
  229. package/dist/pipeline/grader-api.js +5 -12
  230. package/dist/pipeline/grader-compare-runner.js +20 -27
  231. package/dist/pipeline/grader-comparison.d.ts +4 -8
  232. package/dist/pipeline/grader-comparison.js +11 -17
  233. package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
  234. package/dist/pipeline/grader-consistency-runner.js +18 -21
  235. package/dist/pipeline/grader-consistency.d.ts +6 -10
  236. package/dist/pipeline/grader-consistency.js +13 -32
  237. package/dist/pipeline/grader-sensitivity-runner.js +7 -5
  238. package/dist/pipeline/grader-sensitivity.d.ts +2 -6
  239. package/dist/pipeline/grader-sensitivity.js +10 -10
  240. package/dist/pipeline/grader-validate-runner.js +7 -5
  241. package/dist/pipeline/grader-validation.d.ts +2 -6
  242. package/dist/pipeline/grader-validation.js +14 -22
  243. package/dist/pipeline/map-request-to-config.js +6 -1
  244. package/dist/pipeline/mirror-repo-tasks.d.ts +6 -6
  245. package/dist/pipeline/mirror-repo-tasks.js +16 -15
  246. package/dist/pipeline/normalize-mode.d.ts +49 -0
  247. package/dist/pipeline/normalize-mode.js +64 -0
  248. package/dist/pipeline/plan.d.ts +5 -2
  249. package/dist/pipeline/plan.js +134 -78
  250. package/dist/pipeline/pr-comment.js +2 -0
  251. package/dist/pipeline/profile-resolution.d.ts +47 -0
  252. package/dist/pipeline/profile-resolution.js +91 -0
  253. package/dist/pipeline/provenance.d.ts +2 -2
  254. package/dist/pipeline/provenance.js +12 -17
  255. package/dist/pipeline/release-report.js +4 -4
  256. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  257. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  258. package/dist/pipeline/rubric-loader.d.ts +20 -0
  259. package/dist/pipeline/rubric-loader.js +37 -0
  260. package/dist/pipeline/validate.d.ts +4 -4
  261. package/dist/pipeline/validate.js +64 -53
  262. package/dist/schedules/loader.js +18 -8
  263. package/dist/scripts/migrate-task-mode.d.ts +24 -0
  264. package/dist/scripts/migrate-task-mode.js +85 -0
  265. package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
  266. package/dist/scripts/validate-task-sources.d.ts +1 -1
  267. package/dist/scripts/validate-task-sources.js +15 -15
  268. package/dist/sinks/loader.js +5 -7
  269. package/dist/sources.d.ts +7 -7
  270. package/dist/sources.js +22 -24
  271. package/dist/webhook/dispatch.js +2 -1
  272. package/package.json +6 -3
  273. package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
  274. package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
  275. package/tasks/literacy/frameworks.task.ts +128 -0
  276. package/tasks/literacy/functions.task.ts +69 -0
  277. package/tasks/literacy/groq.task.ts +258 -0
  278. package/tasks/literacy/nextjs-live.task.ts +75 -0
  279. package/tasks/literacy/studio-setup.task.ts +131 -0
  280. package/tasks/literacy/visual-editing.task.ts +146 -0
  281. package/config/features.yaml +0 -116
  282. package/config/models.yaml +0 -116
  283. package/config/prompts.yaml +0 -75
  284. package/config/rubrics.yaml +0 -62
  285. package/config/schedules.yaml +0 -43
  286. package/config/sinks.yaml +0 -54
  287. package/config/sources.yaml +0 -51
  288. package/config/thresholds.yaml +0 -49
  289. package/dist/agent-observer/test-imports.d.ts +0 -7
  290. package/dist/agent-observer/test-imports.js +0 -185
@@ -0,0 +1,355 @@
1
+ /**
2
+ * mcp-server-handler.test.ts — Tests for MCP server mode compilation.
3
+ *
4
+ * Tests the full MCP compilation pipeline: task validation, provider
5
+ * assembly, assertion mapping, test case generation, and end-to-end
6
+ * compilation of example tasks.
7
+ *
8
+ * Run: npx tsx --test src/pipeline/compiler/__tests__/mcp-server-handler.test.ts
9
+ */
10
+ import assert from "node:assert/strict";
11
+ import { describe, it } from "node:test";
12
+ import { LiteracyVariant } from "../../normalize-mode.js";
13
+ import { compileMCPTask, handler as mcpHandler, MCP_PROMPT_TEMPLATES, validateMCPTask, } from "../mode-handlers/mcp-server-handler.js";
14
+ import { buildMCPAssertions } from "../mode-handlers/mcp-assertions.js";
15
+ import { allMCPExampleTasks, createAndPublishTask, inspectSchemaTask, queryDocumentsTask, semanticSearchTask, stdioServerTask, } from "../mode-handlers/__fixtures__/mcp-example-tasks.js";
16
+ // ---------------------------------------------------------------------------
17
+ // Helpers
18
+ // ---------------------------------------------------------------------------
19
+ function makeMinimalMCPTask(overrides) {
20
+ return {
21
+ mode: "mcp-server",
22
+ id: "test-mcp-task",
23
+ title: "Test MCP Task",
24
+ description: "A test MCP server evaluation task",
25
+ area: "mcp-server",
26
+ ...overrides,
27
+ };
28
+ }
29
+ // ---------------------------------------------------------------------------
30
+ // handler.getPrompts() — prompt template ownership
31
+ // ---------------------------------------------------------------------------
32
+ describe("MCPServerHandler.getPrompts", () => {
33
+ it("returns prompt templates", () => {
34
+ const prompts = mcpHandler.getPrompts();
35
+ assert.ok(prompts, "getPrompts() should return a record");
36
+ assert.ok(Object.keys(prompts).length > 0, "should return at least one template");
37
+ });
38
+ it("returns templates keyed by MCP-specific IDs (not literacy names)", () => {
39
+ const prompts = mcpHandler.getPrompts();
40
+ const keys = Object.keys(prompts);
41
+ // Must not use literacy template names
42
+ assert.ok(!keys.includes("with-docs"), "should not use literacy key 'with-docs'");
43
+ assert.ok(!keys.includes("without-docs"), "should not use literacy key 'without-docs'");
44
+ assert.ok(!keys.includes(LiteracyVariant.AGENTIC), "should not use literacy key 'agentic'");
45
+ // Must have MCP-appropriate key(s)
46
+ assert.ok(keys.includes("mcp-server"), "should include 'mcp-server' template");
47
+ });
48
+ it("mcp-server template instructs model to use MCP tools", () => {
49
+ const prompts = mcpHandler.getPrompts();
50
+ const template = prompts["mcp-server"];
51
+ assert.ok(template, "mcp-server template should exist");
52
+ assert.ok(template.template.includes("{{task}}"), "should include {{task}} placeholder");
53
+ // Should reference MCP tools / tool usage
54
+ assert.ok(/tool/i.test(template.template), "template should mention tools (MCP-appropriate content)");
55
+ });
56
+ it("template has correct PromptTemplate shape", () => {
57
+ const prompts = mcpHandler.getPrompts();
58
+ const template = prompts["mcp-server"];
59
+ assert.equal(template.id, "mcp-server");
60
+ assert.ok(template.label, "should have a human-readable label");
61
+ assert.ok(template.template, "should have a template string");
62
+ assert.ok(Array.isArray(template.variables), "should declare variables");
63
+ assert.ok(template.variables.includes("task"), "variables should include 'task'");
64
+ });
65
+ it("exported MCP_PROMPT_TEMPLATES matches handler.getPrompts()", () => {
66
+ const fromHandler = mcpHandler.getPrompts();
67
+ assert.deepEqual(fromHandler, MCP_PROMPT_TEMPLATES);
68
+ });
69
+ });
70
+ // ---------------------------------------------------------------------------
71
+ // validateMCPTask
72
+ // ---------------------------------------------------------------------------
73
+ describe("validateMCPTask", () => {
74
+ it("passes for a valid minimal task", () => {
75
+ const errors = validateMCPTask(makeMinimalMCPTask());
76
+ assert.equal(errors.length, 0);
77
+ });
78
+ it("errors on missing ID", () => {
79
+ const errors = validateMCPTask(makeMinimalMCPTask({ id: "" }));
80
+ assert.ok(errors.some((e) => e.field === "id"));
81
+ });
82
+ it("errors on missing title", () => {
83
+ const errors = validateMCPTask(makeMinimalMCPTask({ title: "" }));
84
+ assert.ok(errors.some((e) => e.field === "title"));
85
+ });
86
+ it("errors on stdio transport without command", () => {
87
+ const errors = validateMCPTask(makeMinimalMCPTask({
88
+ serverConfig: { transport: "stdio" },
89
+ }));
90
+ assert.ok(errors.some((e) => e.field === "serverConfig.command"));
91
+ });
92
+ it("errors on sse transport without url", () => {
93
+ const errors = validateMCPTask(makeMinimalMCPTask({
94
+ serverConfig: { transport: "sse" },
95
+ }));
96
+ assert.ok(errors.some((e) => e.field === "serverConfig.url"));
97
+ });
98
+ it("passes for valid stdio config", () => {
99
+ const errors = validateMCPTask(makeMinimalMCPTask({
100
+ serverConfig: {
101
+ transport: "stdio",
102
+ command: "node dist/server.js",
103
+ },
104
+ }));
105
+ assert.equal(errors.length, 0);
106
+ });
107
+ it("passes for valid sse config", () => {
108
+ const errors = validateMCPTask(makeMinimalMCPTask({
109
+ serverConfig: {
110
+ transport: "sse",
111
+ url: "http://localhost:3000/sse",
112
+ },
113
+ }));
114
+ assert.equal(errors.length, 0);
115
+ });
116
+ it("errors on tool-called without value", () => {
117
+ const errors = validateMCPTask(makeMinimalMCPTask({
118
+ assertions: [{ type: "tool-called" }],
119
+ }));
120
+ assert.ok(errors.some((e) => e.field === "assertions"));
121
+ });
122
+ });
123
+ // ---------------------------------------------------------------------------
124
+ // compileMCPTask
125
+ // ---------------------------------------------------------------------------
126
+ describe("compileMCPTask", () => {
127
+ it("produces provider, tests, and prompts", () => {
128
+ const result = compileMCPTask(makeMinimalMCPTask());
129
+ assert.ok(result.providers.length > 0, "Should produce providers");
130
+ assert.ok(result.tests.length > 0, "Should produce test cases");
131
+ assert.ok(result.prompts.length > 0, "Should produce prompts");
132
+ });
133
+ it("builds Promptfoo-native MCP provider for stdio", () => {
134
+ const result = compileMCPTask(makeMinimalMCPTask({
135
+ serverConfig: {
136
+ transport: "stdio",
137
+ command: "node dist/server.js --flag",
138
+ },
139
+ }));
140
+ assert.equal(result.providers.length, 1);
141
+ assert.equal(result.providers[0].id, "mcp");
142
+ const config = result.providers[0].config;
143
+ assert.equal(config.enabled, true);
144
+ const server = config.server;
145
+ assert.equal(server.command, "node");
146
+ assert.deepEqual(server.args, ["dist/server.js", "--flag"]);
147
+ });
148
+ it("builds Promptfoo-native MCP provider for URL-based transport", () => {
149
+ const result = compileMCPTask(makeMinimalMCPTask({
150
+ serverConfig: {
151
+ transport: "sse",
152
+ url: "http://localhost:3000/sse",
153
+ },
154
+ }));
155
+ assert.equal(result.providers[0].id, "mcp");
156
+ const config = result.providers[0].config;
157
+ const server = config.server;
158
+ assert.equal(server.url, "http://localhost:3000/sse");
159
+ });
160
+ it("maps auth config to Promptfoo provider", () => {
161
+ const result = compileMCPTask(makeMinimalMCPTask({
162
+ serverConfig: {
163
+ transport: "streamable-http",
164
+ url: "https://mcp.example.com",
165
+ auth: {
166
+ type: "bearer",
167
+ token: "{{env.MY_TOKEN}}",
168
+ },
169
+ },
170
+ }));
171
+ const config = result.providers[0].config;
172
+ const server = config.server;
173
+ assert.deepEqual(server.auth, {
174
+ type: "bearer",
175
+ token: "{{env.MY_TOKEN}}",
176
+ });
177
+ });
178
+ it("maps capabilities to Promptfoo tools filter", () => {
179
+ const result = compileMCPTask(makeMinimalMCPTask({
180
+ capabilities: ["query_documents", "get_schema"],
181
+ serverConfig: {
182
+ transport: "streamable-http",
183
+ url: "https://mcp.example.com",
184
+ },
185
+ }));
186
+ const config = result.providers[0].config;
187
+ assert.deepEqual(config.tools, ["query_documents", "get_schema"]);
188
+ });
189
+ it("uses task description as prompt text", () => {
190
+ const result = compileMCPTask(makeMinimalMCPTask({
191
+ description: "Test the getDocument tool",
192
+ }));
193
+ assert.equal(result.prompts[0].raw, "Test the getDocument tool");
194
+ });
195
+ it("prefers prompt.text over description", () => {
196
+ const result = compileMCPTask(makeMinimalMCPTask({
197
+ description: "Description",
198
+ prompt: { text: "Custom prompt text" },
199
+ }));
200
+ assert.equal(result.prompts[0].raw, "Custom prompt text");
201
+ });
202
+ it("includes task vars in test case", () => {
203
+ const result = compileMCPTask(makeMinimalMCPTask({
204
+ prompt: {
205
+ vars: { task: "Do the thing", extra: "value" },
206
+ },
207
+ }));
208
+ assert.equal(result.tests[0].vars.task, "Do the thing");
209
+ assert.equal(result.tests[0].vars.extra, "value");
210
+ });
211
+ it("creates multi-turn test case when multiTurn is defined", () => {
212
+ const result = compileMCPTask(makeMinimalMCPTask({
213
+ multiTurn: {
214
+ turns: [
215
+ { role: "user", content: "Hello" },
216
+ { role: "assistant", content: "Hi" },
217
+ ],
218
+ },
219
+ }));
220
+ // Primary + multi-turn test cases
221
+ assert.equal(result.tests.length, 2);
222
+ assert.ok(result.tests[1].description.includes("[multi-turn]"));
223
+ });
224
+ it("warns when serverConfig is missing", () => {
225
+ const result = compileMCPTask(makeMinimalMCPTask());
226
+ assert.ok(result.warnings.some((w) => w.includes("no serverConfig")));
227
+ });
228
+ it("sets grader provider on LLM assertions", () => {
229
+ const result = compileMCPTask(makeMinimalMCPTask({
230
+ assertions: [
231
+ {
232
+ type: "llm-rubric",
233
+ value: "Check quality",
234
+ },
235
+ ],
236
+ }), { graderProvider: "openai:chat:gpt-5" });
237
+ const llmAssert = result.tests[0].assert?.find((a) => a.type === "llm-rubric");
238
+ assert.ok(llmAssert);
239
+ assert.equal(llmAssert.provider, "openai:chat:gpt-5");
240
+ });
241
+ });
242
+ // ---------------------------------------------------------------------------
243
+ // buildMCPAssertions
244
+ // ---------------------------------------------------------------------------
245
+ describe("buildMCPAssertions", () => {
246
+ const ctx = { taskId: "test", capabilities: [], graderProvider: undefined };
247
+ it("maps tool-called to javascript assertion", () => {
248
+ const { assertions } = buildMCPAssertions([{ type: "tool-called", value: "getDocument" }], ctx);
249
+ assert.equal(assertions.length, 1);
250
+ assert.equal(assertions[0].type, "javascript");
251
+ assert.ok(assertions[0].value.includes("getDocument"), "Should reference tool name");
252
+ });
253
+ it("maps tool-input-matches to javascript assertion", () => {
254
+ const { assertions } = buildMCPAssertions([{ type: "tool-input-matches", value: { id: "doc-123" } }], ctx);
255
+ assert.equal(assertions.length, 1);
256
+ assert.equal(assertions[0].type, "javascript");
257
+ assert.ok(assertions[0].value.includes("doc-123"));
258
+ });
259
+ it("maps tool-output-matches to javascript assertion", () => {
260
+ const { assertions } = buildMCPAssertions([{ type: "tool-output-matches", value: { title: "Hello" } }], ctx);
261
+ assert.equal(assertions.length, 1);
262
+ assert.equal(assertions[0].type, "javascript");
263
+ assert.ok(assertions[0].value.includes("Hello"));
264
+ });
265
+ it("maps error-returned to javascript assertion", () => {
266
+ const { assertions } = buildMCPAssertions([{ type: "error-returned", value: { code: -32602 } }], ctx);
267
+ assert.equal(assertions.length, 1);
268
+ assert.equal(assertions[0].type, "javascript");
269
+ assert.ok(assertions[0].value.includes("-32602"));
270
+ });
271
+ it("maps capability-available to javascript assertion", () => {
272
+ const { assertions } = buildMCPAssertions([{ type: "capability-available", value: "tools/list" }], ctx);
273
+ assert.equal(assertions.length, 1);
274
+ assert.equal(assertions[0].type, "javascript");
275
+ assert.ok(assertions[0].value.includes("tools/list"));
276
+ });
277
+ it("passes through standard assertion types", () => {
278
+ const { assertions } = buildMCPAssertions([{ type: "contains", value: "result" }, { type: "is-json" }], ctx);
279
+ assert.equal(assertions.length, 2);
280
+ assert.equal(assertions[0].type, "contains");
281
+ assert.equal(assertions[1].type, "is-json");
282
+ });
283
+ it("preserves assertion weights", () => {
284
+ const { assertions } = buildMCPAssertions([{ type: "tool-called", value: "test", weight: 0.5 }], ctx);
285
+ assert.equal(assertions[0].weight, 0.5);
286
+ });
287
+ it("warns on unknown assertion type", () => {
288
+ const { warnings } = buildMCPAssertions([{ type: "unknown-type", value: "x" }], ctx);
289
+ assert.ok(warnings.some((w) => w.includes("unknown")));
290
+ });
291
+ });
292
+ // ---------------------------------------------------------------------------
293
+ // Example task compilation (end-to-end)
294
+ // ---------------------------------------------------------------------------
295
+ describe("example MCP tasks — end-to-end compilation", () => {
296
+ it("compiles all example tasks without errors", () => {
297
+ for (const task of allMCPExampleTasks) {
298
+ const result = compileMCPTask(task);
299
+ assert.ok(result.providers.length > 0, `${task.id}: should produce providers`);
300
+ assert.ok(result.tests.length > 0, `${task.id}: should produce test cases`);
301
+ assert.ok(result.prompts.length > 0, `${task.id}: should produce prompts`);
302
+ }
303
+ });
304
+ it("query task has tool-called + contains + llm-rubric assertions", () => {
305
+ const result = compileMCPTask(queryDocumentsTask);
306
+ const asserts = result.tests[0].assert;
307
+ // tool-called (→ javascript), contains × 2, llm-rubric
308
+ assert.equal(asserts.length, 4);
309
+ assert.equal(asserts[0].type, "javascript"); // tool-called → javascript
310
+ assert.equal(asserts[1].type, "contains");
311
+ assert.equal(asserts[2].type, "contains");
312
+ assert.equal(asserts[3].type, "llm-rubric");
313
+ });
314
+ it("schema task uses get_schema tool", () => {
315
+ const result = compileMCPTask(inspectSchemaTask);
316
+ const asserts = result.tests[0].assert;
317
+ assert.ok(asserts.some((a) => a.type === "javascript" && a.value.includes("get_schema")), "Should have tool-called assertion for get_schema");
318
+ });
319
+ it("create-publish task produces multi-turn test case", () => {
320
+ const result = compileMCPTask(createAndPublishTask);
321
+ // Primary + multi-turn
322
+ assert.equal(result.tests.length, 2);
323
+ assert.ok(result.tests[1].description?.includes("[multi-turn]"));
324
+ });
325
+ it("stdio task has Promptfoo-native MCP provider with command", () => {
326
+ const result = compileMCPTask(stdioServerTask);
327
+ assert.equal(result.providers[0].id, "mcp");
328
+ const config = result.providers[0].config;
329
+ assert.equal(config.enabled, true);
330
+ const server = config.server;
331
+ assert.equal(server.command, "node");
332
+ assert.deepEqual(server.args, ["dist/sanity-mcp-server.js"]);
333
+ });
334
+ it("semantic search task has two tool-called + one llm-rubric assertion", () => {
335
+ const result = compileMCPTask(semanticSearchTask);
336
+ const asserts = result.tests[0].assert;
337
+ // tool-called × 2 (→ javascript) + llm-rubric
338
+ assert.equal(asserts.length, 3);
339
+ assert.equal(asserts[0].type, "javascript"); // tool-called → javascript
340
+ assert.ok(asserts[0].value.includes("list_embeddings_indices"), "Should have tool-called assertion for list_embeddings_indices");
341
+ assert.equal(asserts[1].type, "javascript"); // tool-called → javascript
342
+ assert.ok(asserts[1].value.includes("semantic_search"), "Should have tool-called assertion for semantic_search");
343
+ assert.equal(asserts[2].type, "llm-rubric");
344
+ });
345
+ it("remote task has bearer auth and tools filter", () => {
346
+ const result = compileMCPTask(queryDocumentsTask);
347
+ const config = result.providers[0].config;
348
+ const server = config.server;
349
+ assert.deepEqual(server.auth, {
350
+ type: "bearer",
351
+ token: "{{env.SANITY_MCP_AUTH_TOKEN}}",
352
+ });
353
+ assert.deepEqual(config.tools, ["query_documents", "get_schema"]);
354
+ });
355
+ });
@@ -0,0 +1,9 @@
1
+ /**
2
+ * promptfoo-compiler.test.ts — Unit tests for the PromptfooCompiler.
3
+ *
4
+ * Tests compilation of a TaskGraph into Promptfoo configuration,
5
+ * including provider assembly, prompt generation, and test case creation.
6
+ *
7
+ * Run: npx tsx --test src/pipeline/compiler/__tests__/promptfoo-compiler.test.ts
8
+ */
9
+ export {};