@sanity/ailf 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. package/config/features.ts +23 -0
  2. package/config/models.ts +83 -0
  3. package/config/prompts.ts +16 -0
  4. package/config/rubrics.ts +225 -0
  5. package/config/schedules.ts +47 -0
  6. package/config/sinks.ts +37 -0
  7. package/config/sources.ts +21 -0
  8. package/config/thresholds.ts +61 -0
  9. package/dist/_vendor/ailf-core/config-helpers.d.ts +174 -0
  10. package/dist/_vendor/ailf-core/config-helpers.js +150 -0
  11. package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
  12. package/dist/_vendor/ailf-core/env-helper.js +45 -0
  13. package/dist/_vendor/ailf-core/index.d.ts +3 -0
  14. package/dist/_vendor/ailf-core/index.js +5 -0
  15. package/dist/_vendor/ailf-core/ports/context.d.ts +15 -2
  16. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
  17. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
  18. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
  19. package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
  20. package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
  21. package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
  22. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +7 -2
  23. package/dist/_vendor/ailf-core/schemas/eval-config.js +7 -2
  24. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +8 -3
  25. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +6 -1
  26. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -29
  27. package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -8
  28. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
  29. package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
  30. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
  31. package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
  32. package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
  33. package/dist/_vendor/ailf-core/services/index.js +2 -1
  34. package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
  35. package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
  36. package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
  37. package/dist/_vendor/ailf-core/services/scoring.js +25 -15
  38. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
  39. package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
  40. package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
  41. package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
  42. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +319 -0
  43. package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
  44. package/dist/_vendor/ailf-core/types/index.d.ts +45 -81
  45. package/dist/_vendor/ailf-core/types/index.js +8 -1
  46. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +202 -0
  47. package/dist/_vendor/ailf-core/types/plugin-registry.js +132 -0
  48. package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
  49. package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
  50. package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
  51. package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
  52. package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
  53. package/dist/_vendor/ailf-core/types/trace.js +18 -0
  54. package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
  55. package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
  56. package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
  57. package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
  58. package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
  59. package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
  60. package/dist/_vendor/ailf-shared/index.d.ts +0 -1
  61. package/dist/_vendor/ailf-shared/index.js +0 -1
  62. package/dist/adapters/api-client/build-request.js +14 -13
  63. package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
  64. package/dist/adapters/config-sources/file-config-adapter.js +38 -12
  65. package/dist/adapters/config-sources/index.d.ts +2 -0
  66. package/dist/adapters/config-sources/index.js +1 -0
  67. package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
  68. package/dist/adapters/config-sources/ts-config-loader.js +133 -0
  69. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
  70. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
  71. package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
  72. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  73. package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
  74. package/dist/adapters/task-sources/content-lake-task-source.js +22 -23
  75. package/dist/adapters/task-sources/index.d.ts +1 -0
  76. package/dist/adapters/task-sources/index.js +1 -0
  77. package/dist/adapters/task-sources/repo-task-source.d.ts +4 -4
  78. package/dist/adapters/task-sources/repo-task-source.js +69 -16
  79. package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
  80. package/dist/adapters/task-sources/task-file-loader.js +83 -0
  81. package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
  82. package/dist/adapters/task-sources/yaml-task-source.js +19 -16
  83. package/dist/cli.js +0 -2
  84. package/dist/commands/baseline.js +4 -1
  85. package/dist/commands/calculate-scores.js +1 -1
  86. package/dist/commands/coverage-audit.js +7 -1
  87. package/dist/commands/explain-handler.js +25 -23
  88. package/dist/commands/fetch-docs.js +3 -2
  89. package/dist/commands/generate-configs.js +1 -1
  90. package/dist/commands/interactive.js +11 -7
  91. package/dist/commands/pipeline-action.d.ts +2 -0
  92. package/dist/commands/pipeline-action.js +16 -6
  93. package/dist/commands/pipeline.d.ts +1 -0
  94. package/dist/commands/pipeline.js +4 -2
  95. package/dist/commands/pr-comment.js +1 -1
  96. package/dist/commands/publish.js +2 -2
  97. package/dist/commands/readiness-report.js +13 -6
  98. package/dist/composition-root.d.ts +1 -1
  99. package/dist/composition-root.js +67 -4
  100. package/dist/orchestration/build-app-context.js +1 -0
  101. package/dist/orchestration/build-step-sequence.js +24 -6
  102. package/dist/orchestration/steps/calculate-scores-step.js +24 -11
  103. package/dist/orchestration/steps/fetch-docs-step.js +6 -4
  104. package/dist/orchestration/steps/gap-analysis-step.js +8 -7
  105. package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
  106. package/dist/orchestration/steps/generate-configs-step.js +245 -51
  107. package/dist/orchestration/steps/grader-consistency-step.js +7 -4
  108. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  109. package/dist/orchestration/steps/readiness-step.js +5 -6
  110. package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
  111. package/dist/orchestration/steps/run-eval-step.js +8 -7
  112. package/dist/pipeline/cache.d.ts +1 -1
  113. package/dist/pipeline/cache.js +36 -8
  114. package/dist/pipeline/calculate-scores.d.ts +2 -4
  115. package/dist/pipeline/calculate-scores.js +43 -113
  116. package/dist/pipeline/checks.js +2 -2
  117. package/dist/pipeline/compare.js +8 -8
  118. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
  119. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
  120. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
  121. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
  122. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
  123. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
  124. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
  125. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
  126. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
  127. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +355 -0
  128. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
  129. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
  130. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
  131. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
  132. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
  133. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +471 -0
  134. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
  135. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
  136. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
  137. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
  138. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
  139. package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
  140. package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
  141. package/dist/pipeline/compiler/assertion-mapper.js +175 -0
  142. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
  143. package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
  144. package/dist/pipeline/compiler/config-loader.d.ts +56 -0
  145. package/dist/pipeline/compiler/config-loader.js +111 -0
  146. package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
  147. package/dist/pipeline/compiler/fixture-resolver.js +113 -0
  148. package/dist/pipeline/compiler/hash.d.ts +11 -0
  149. package/dist/pipeline/compiler/hash.js +18 -0
  150. package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
  151. package/dist/pipeline/compiler/ignore-fields.js +113 -0
  152. package/dist/pipeline/compiler/index.d.ts +29 -0
  153. package/dist/pipeline/compiler/index.js +45 -0
  154. package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
  155. package/dist/pipeline/compiler/literacy-bridge.js +172 -0
  156. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
  157. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
  158. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
  159. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
  160. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
  161. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
  162. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
  163. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
  164. package/dist/pipeline/compiler/mode-handlers/index.d.ts +16 -0
  165. package/dist/pipeline/compiler/mode-handlers/index.js +21 -0
  166. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
  167. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
  168. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
  169. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
  170. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
  171. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +277 -0
  172. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +67 -0
  173. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +309 -0
  174. package/dist/pipeline/compiler/presets/index.d.ts +9 -0
  175. package/dist/pipeline/compiler/presets/index.js +8 -0
  176. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +45 -0
  177. package/dist/pipeline/compiler/presets/sanity-literacy.js +354 -0
  178. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
  179. package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
  180. package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
  181. package/dist/pipeline/compiler/provider-assembler.js +137 -0
  182. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
  183. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
  184. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
  185. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
  186. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
  187. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
  188. package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
  189. package/dist/pipeline/compiler/sandbox/index.js +11 -0
  190. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
  191. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
  192. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
  193. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
  194. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
  195. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
  196. package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
  197. package/dist/pipeline/compiler/scoring-bridge.js +114 -0
  198. package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
  199. package/dist/pipeline/compiler/task-graph-builder.js +291 -0
  200. package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
  201. package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
  202. package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
  203. package/dist/pipeline/compiler/telemetry/index.js +19 -0
  204. package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
  205. package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
  206. package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
  207. package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
  208. package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
  209. package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
  210. package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
  211. package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
  212. package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
  213. package/dist/pipeline/compiler/variable-resolver.js +115 -0
  214. package/dist/pipeline/coverage-audit.d.ts +15 -5
  215. package/dist/pipeline/coverage-audit.js +41 -22
  216. package/dist/pipeline/eval-constants.d.ts +16 -6
  217. package/dist/pipeline/eval-constants.js +25 -4
  218. package/dist/pipeline/eval-fingerprint.d.ts +2 -2
  219. package/dist/pipeline/eval-fingerprint.js +8 -9
  220. package/dist/pipeline/expand-tasks.d.ts +19 -10
  221. package/dist/pipeline/expand-tasks.js +34 -28
  222. package/dist/pipeline/gap-analysis.d.ts +1 -1
  223. package/dist/pipeline/gap-analysis.js +2 -2
  224. package/dist/pipeline/generate-configs.d.ts +22 -4
  225. package/dist/pipeline/generate-configs.js +53 -24
  226. package/dist/pipeline/grader-api.d.ts +3 -3
  227. package/dist/pipeline/grader-api.js +5 -12
  228. package/dist/pipeline/grader-compare-runner.js +20 -27
  229. package/dist/pipeline/grader-comparison.d.ts +4 -8
  230. package/dist/pipeline/grader-comparison.js +11 -17
  231. package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
  232. package/dist/pipeline/grader-consistency-runner.js +16 -20
  233. package/dist/pipeline/grader-consistency.d.ts +6 -10
  234. package/dist/pipeline/grader-consistency.js +13 -32
  235. package/dist/pipeline/grader-sensitivity-runner.js +7 -5
  236. package/dist/pipeline/grader-sensitivity.d.ts +2 -6
  237. package/dist/pipeline/grader-sensitivity.js +10 -10
  238. package/dist/pipeline/grader-validate-runner.js +7 -5
  239. package/dist/pipeline/grader-validation.d.ts +2 -6
  240. package/dist/pipeline/grader-validation.js +14 -22
  241. package/dist/pipeline/map-request-to-config.js +6 -1
  242. package/dist/pipeline/mirror-repo-tasks.d.ts +6 -6
  243. package/dist/pipeline/mirror-repo-tasks.js +16 -15
  244. package/dist/pipeline/normalize-mode.d.ts +49 -0
  245. package/dist/pipeline/normalize-mode.js +64 -0
  246. package/dist/pipeline/plan.d.ts +5 -2
  247. package/dist/pipeline/plan.js +134 -78
  248. package/dist/pipeline/pr-comment.js +2 -0
  249. package/dist/pipeline/profile-resolution.d.ts +22 -14
  250. package/dist/pipeline/profile-resolution.js +41 -19
  251. package/dist/pipeline/provenance.d.ts +2 -2
  252. package/dist/pipeline/provenance.js +12 -17
  253. package/dist/pipeline/release-report.js +4 -4
  254. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  255. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  256. package/dist/pipeline/rubric-loader.d.ts +20 -0
  257. package/dist/pipeline/rubric-loader.js +37 -0
  258. package/dist/pipeline/validate.d.ts +4 -4
  259. package/dist/pipeline/validate.js +64 -53
  260. package/dist/schedules/loader.js +18 -8
  261. package/dist/scripts/migrate-task-mode.d.ts +24 -0
  262. package/dist/scripts/migrate-task-mode.js +85 -0
  263. package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
  264. package/dist/scripts/validate-task-sources.d.ts +1 -1
  265. package/dist/scripts/validate-task-sources.js +15 -15
  266. package/dist/sinks/loader.js +5 -7
  267. package/dist/sources.d.ts +7 -7
  268. package/dist/sources.js +22 -24
  269. package/dist/webhook/dispatch.js +2 -1
  270. package/package.json +6 -3
  271. package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
  272. package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
  273. package/tasks/literacy/frameworks.task.ts +128 -0
  274. package/tasks/literacy/functions.task.ts +69 -0
  275. package/tasks/literacy/groq.task.ts +258 -0
  276. package/tasks/literacy/nextjs-live.task.ts +75 -0
  277. package/tasks/literacy/studio-setup.task.ts +131 -0
  278. package/tasks/literacy/visual-editing.task.ts +146 -0
  279. package/config/features.yaml +0 -116
  280. package/config/models.yaml +0 -116
  281. package/config/prompts.yaml +0 -75
  282. package/config/rubrics.yaml +0 -81
  283. package/config/schedules.yaml +0 -43
  284. package/config/sinks.yaml +0 -54
  285. package/config/sources.yaml +0 -51
  286. package/config/thresholds.yaml +0 -49
  287. package/dist/agent-observer/test-imports.d.ts +0 -7
  288. package/dist/agent-observer/test-imports.js +0 -185
@@ -0,0 +1,277 @@
1
+ /**
2
+ * MCP-specific assertion types — ergonomic assertions for MCP server testing.
3
+ *
4
+ * Each assertion type compiles down to a Promptfoo `javascript` assertion
5
+ * with the appropriate validation logic. The developer writes:
6
+ *
7
+ * ```typescript
8
+ * assertions: [
9
+ * { type: "tool-called", value: "getDocument" },
10
+ * { type: "tool-input-matches", value: { documentId: "doc-123" } },
11
+ * { type: "tool-output-matches", value: { title: "Hello" } },
12
+ * { type: "error-returned", value: { code: -32602 } },
13
+ * ]
14
+ * ```
15
+ *
16
+ * The compiler transforms these into Promptfoo-compatible `javascript`
17
+ * assertions that inspect the tool call trace in the evaluation output.
18
+ *
19
+ * @see docs/exec-plans/architecture-overhaul/phase-3-mcp-server-mode.md
20
+ */
21
+ // ---------------------------------------------------------------------------
22
+ // Public API
23
+ // ---------------------------------------------------------------------------
24
+ /**
25
+ * Build MCP-specific assertions from task assertion definitions.
26
+ *
27
+ * Handles both MCP-specific types (tool-called, tool-input-matches, etc.)
28
+ * and standard assertion types (contains, llm-rubric, etc.) which are
29
+ * passed through unchanged.
30
+ */
31
+ export function buildMCPAssertions(assertions, context) {
32
+ const result = [];
33
+ const warnings = [];
34
+ for (const assertion of assertions) {
35
+ const mapped = mapMCPAssertion(assertion, context, warnings);
36
+ if (mapped) {
37
+ result.push(mapped);
38
+ }
39
+ }
40
+ return { assertions: result, warnings };
41
+ }
42
+ // ---------------------------------------------------------------------------
43
+ // Assertion mapping
44
+ // ---------------------------------------------------------------------------
45
+ function mapMCPAssertion(assertion, context, warnings) {
46
+ switch (assertion.type) {
47
+ case "tool-called":
48
+ return buildToolCalledAssertion(assertion, context);
49
+ case "tool-input-matches":
50
+ return buildToolInputMatchesAssertion(assertion, context);
51
+ case "tool-output-matches":
52
+ return buildToolOutputMatchesAssertion(assertion, context);
53
+ case "error-returned":
54
+ return buildErrorReturnedAssertion(assertion, context);
55
+ case "capability-available":
56
+ return buildCapabilityAssertion(assertion, context);
57
+ // Standard assertions — pass through
58
+ case "contains":
59
+ case "equals":
60
+ case "regex":
61
+ case "is-json":
62
+ case "llm-rubric":
63
+ case "javascript":
64
+ case "python":
65
+ return {
66
+ type: assertion.type,
67
+ ...("value" in assertion ? { value: assertion.value } : {}),
68
+ ...(assertion.weight !== undefined ? { weight: assertion.weight } : {}),
69
+ ...(assertion.type === "llm-rubric" && context.graderProvider
70
+ ? { provider: context.graderProvider }
71
+ : {}),
72
+ };
73
+ default:
74
+ warnings.push(`MCP task "${context.taskId}": unknown assertion type "${assertion.type}" — passed through`);
75
+ return {
76
+ type: assertion.type,
77
+ ...("value" in assertion ? { value: assertion.value } : {}),
78
+ };
79
+ }
80
+ }
81
+ // ---------------------------------------------------------------------------
82
+ // tool-called — asserts the model called a specific tool by name
83
+ // ---------------------------------------------------------------------------
84
+ function buildToolCalledAssertion(assertion, _context) {
85
+ const toolName = String(assertion.value ?? "");
86
+ return {
87
+ type: "javascript",
88
+ value: buildJsAssertion(`tool-called: ${toolName}`, `
89
+ const toolCalls = context.vars.__toolCalls || [];
90
+ const called = toolCalls.some(tc => tc.name === ${JSON.stringify(toolName)});
91
+ return {
92
+ pass: called,
93
+ score: called ? 1 : 0,
94
+ reason: called
95
+ ? 'Tool ' + ${JSON.stringify(JSON.stringify(toolName))} + ' was called as expected'
96
+ : 'Expected tool ' + ${JSON.stringify(JSON.stringify(toolName))} + ' to be called, but it was not. ' +
97
+ 'Tools called: ' + (toolCalls.map(tc => tc.name).join(', ') || 'none'),
98
+ };`),
99
+ ...(assertion.weight !== undefined ? { weight: assertion.weight } : {}),
100
+ };
101
+ }
102
+ // ---------------------------------------------------------------------------
103
+ // tool-input-matches — asserts tool call inputs match a schema/value
104
+ // ---------------------------------------------------------------------------
105
+ function buildToolInputMatchesAssertion(assertion, _context) {
106
+ const expected = assertion.value;
107
+ const toolName = assertion.toolName ?? assertion.tool;
108
+ return {
109
+ type: "javascript",
110
+ value: buildJsAssertion(`tool-input-matches${toolName ? `: ${toolName}` : ""}`, `
111
+ const toolCalls = context.vars.__toolCalls || [];
112
+ const expected = ${JSON.stringify(expected)};
113
+ const toolFilter = ${JSON.stringify(toolName ?? null)};
114
+
115
+ const targetCalls = toolFilter
116
+ ? toolCalls.filter(tc => tc.name === toolFilter)
117
+ : toolCalls;
118
+
119
+ if (targetCalls.length === 0) {
120
+ return {
121
+ pass: false,
122
+ score: 0,
123
+ reason: toolFilter
124
+ ? 'No calls to tool "' + toolFilter + '" found'
125
+ : 'No tool calls found',
126
+ };
127
+ }
128
+
129
+ // Check if any call's input matches the expected value
130
+ const match = targetCalls.some(tc => {
131
+ const input = tc.input || tc.arguments || {};
132
+ return Object.entries(expected).every(([k, v]) =>
133
+ JSON.stringify(input[k]) === JSON.stringify(v)
134
+ );
135
+ });
136
+
137
+ return {
138
+ pass: match,
139
+ score: match ? 1 : 0,
140
+ reason: match
141
+ ? 'Tool input matches expected values'
142
+ : 'Tool input does not match. Expected: ' + JSON.stringify(expected) +
143
+ ', Got: ' + JSON.stringify(targetCalls.map(tc => tc.input || tc.arguments)),
144
+ };`),
145
+ ...(assertion.weight !== undefined ? { weight: assertion.weight } : {}),
146
+ };
147
+ }
148
+ // ---------------------------------------------------------------------------
149
+ // tool-output-matches — asserts tool outputs match expected shape/values
150
+ // ---------------------------------------------------------------------------
151
+ function buildToolOutputMatchesAssertion(assertion, _context) {
152
+ const expected = assertion.value;
153
+ const toolName = assertion.toolName ?? assertion.tool;
154
+ return {
155
+ type: "javascript",
156
+ value: buildJsAssertion(`tool-output-matches${toolName ? `: ${toolName}` : ""}`, `
157
+ const toolCalls = context.vars.__toolCalls || [];
158
+ const expected = ${JSON.stringify(expected)};
159
+ const toolFilter = ${JSON.stringify(toolName ?? null)};
160
+
161
+ const targetCalls = toolFilter
162
+ ? toolCalls.filter(tc => tc.name === toolFilter)
163
+ : toolCalls;
164
+
165
+ if (targetCalls.length === 0) {
166
+ return {
167
+ pass: false,
168
+ score: 0,
169
+ reason: toolFilter
170
+ ? 'No calls to tool "' + toolFilter + '" found'
171
+ : 'No tool calls found',
172
+ };
173
+ }
174
+
175
+ const match = targetCalls.some(tc => {
176
+ const output = tc.output || tc.result || {};
177
+ return Object.entries(expected).every(([k, v]) =>
178
+ JSON.stringify(output[k]) === JSON.stringify(v)
179
+ );
180
+ });
181
+
182
+ return {
183
+ pass: match,
184
+ score: match ? 1 : 0,
185
+ reason: match
186
+ ? 'Tool output matches expected values'
187
+ : 'Tool output does not match. Expected: ' + JSON.stringify(expected),
188
+ };`),
189
+ ...(assertion.weight !== undefined ? { weight: assertion.weight } : {}),
190
+ };
191
+ }
192
+ // ---------------------------------------------------------------------------
193
+ // error-returned — asserts the server returned a specific error
194
+ // ---------------------------------------------------------------------------
195
+ function buildErrorReturnedAssertion(assertion, _context) {
196
+ const expected = assertion.value;
197
+ return {
198
+ type: "javascript",
199
+ value: buildJsAssertion("error-returned", `
200
+ const toolCalls = context.vars.__toolCalls || [];
201
+ const expected = ${JSON.stringify(expected ?? {})};
202
+
203
+ const errorCall = toolCalls.find(tc => tc.error);
204
+ if (!errorCall) {
205
+ return {
206
+ pass: false,
207
+ score: 0,
208
+ reason: 'Expected an error response but no errors were returned',
209
+ };
210
+ }
211
+
212
+ const error = errorCall.error;
213
+ let pass = true;
214
+ const reasons = [];
215
+
216
+ if (expected.code !== undefined && error.code !== expected.code) {
217
+ pass = false;
218
+ reasons.push('Expected error code ' + expected.code + ', got ' + error.code);
219
+ }
220
+
221
+ if (expected.message !== undefined) {
222
+ const msgMatch = typeof error.message === 'string' &&
223
+ error.message.includes(expected.message);
224
+ if (!msgMatch) {
225
+ pass = false;
226
+ reasons.push('Expected error message containing "' + expected.message +
227
+ '", got "' + (error.message || '') + '"');
228
+ }
229
+ }
230
+
231
+ if (pass) {
232
+ reasons.push('Error matches expected pattern');
233
+ }
234
+
235
+ return {
236
+ pass,
237
+ score: pass ? 1 : 0,
238
+ reason: reasons.join('; '),
239
+ };`),
240
+ ...(assertion.weight !== undefined ? { weight: assertion.weight } : {}),
241
+ };
242
+ }
243
+ // ---------------------------------------------------------------------------
244
+ // capability-available — asserts the server advertises a capability
245
+ // ---------------------------------------------------------------------------
246
+ function buildCapabilityAssertion(assertion, _context) {
247
+ const capability = String(assertion.value ?? "");
248
+ return {
249
+ type: "javascript",
250
+ value: buildJsAssertion(`capability-available: ${capability}`, `
251
+ const capabilities = context.vars.__serverCapabilities || [];
252
+ const expected = ${JSON.stringify(capability)};
253
+ const available = capabilities.includes(expected);
254
+
255
+ return {
256
+ pass: available,
257
+ score: available ? 1 : 0,
258
+ reason: available
259
+ ? 'Server advertises capability "' + expected + '"'
260
+ : 'Server does not advertise capability "' + expected + '". ' +
261
+ 'Available: ' + (capabilities.join(', ') || 'none'),
262
+ };`),
263
+ ...(assertion.weight !== undefined ? { weight: assertion.weight } : {}),
264
+ };
265
+ }
266
+ // ---------------------------------------------------------------------------
267
+ // Helpers
268
+ // ---------------------------------------------------------------------------
269
+ /**
270
+ * Build a Promptfoo-compatible JavaScript assertion string.
271
+ *
272
+ * Wraps the assertion body in a function that receives `output` and `context`
273
+ * from Promptfoo's assertion runner.
274
+ */
275
+ function buildJsAssertion(label, body) {
276
+ return `// MCP assertion: ${label}\n(function() {\n${body.trim()}\n})()`;
277
+ }
@@ -0,0 +1,67 @@
1
+ /**
2
+ * MCPServerModeHandler — compilation rules for `mcp-server` evaluation mode.
3
+ *
4
+ * This is the first non-literacy mode handler, proving the compiler
5
+ * architecture works end-to-end. It translates MCP server task definitions
6
+ * into Promptfoo configuration with:
7
+ *
8
+ * - An MCP provider that wraps the server under test
9
+ * - Tool-call assertions compiled to Promptfoo `javascript` assertions
10
+ * - Server lifecycle management via Promptfoo provider hooks
11
+ * - Multi-turn conversation support via Promptfoo's `steps` syntax
12
+ *
13
+ * Promptfoo supports MCP servers as providers natively:
14
+ * ```yaml
15
+ * providers:
16
+ * - id: mcp:./my-server
17
+ * config:
18
+ * command: node
19
+ * args: [./dist/server.js]
20
+ * env: { API_KEY: "..." }
21
+ * ```
22
+ *
23
+ * This handler assembles that config from AILF's `MCPServerTaskDefinition`.
24
+ *
25
+ * @see docs/exec-plans/architecture-overhaul/phase-3-mcp-server-mode.md
26
+ * @see packages/core/src/types/eval-mode-config.ts — MCPServerModeConfig
27
+ * @see packages/core/src/types/generalized-task.ts — MCPServerTaskDefinition
28
+ */
29
+ import type { MCPServerTaskDefinition, ModeHandler, PromptTemplate } from "../../../_vendor/ailf-core/index.d.ts";
30
+ import type { PromptfooPrompt, PromptfooProvider, PromptfooTestCase } from "../promptfoo-compiler.js";
31
+ export declare const MCP_PROMPT_TEMPLATES: Record<string, PromptTemplate>;
32
+ /** Options for compiling an MCP server task */
33
+ export interface MCPCompileOptions {
34
+ /** Grader provider for LLM-graded assertions */
35
+ graderProvider?: string;
36
+ }
37
+ /** Result of compiling a single MCP task */
38
+ export interface MCPCompileResult {
39
+ /** Promptfoo provider config for the MCP server */
40
+ providers: PromptfooProvider[];
41
+ /** Compiled test cases */
42
+ tests: PromptfooTestCase[];
43
+ /** Prompts for MCP evaluation */
44
+ prompts: PromptfooPrompt[];
45
+ /** Warnings generated during compilation */
46
+ warnings: string[];
47
+ }
48
+ /** Validation errors for MCP task definitions */
49
+ export interface MCPValidationError {
50
+ field: string;
51
+ message: string;
52
+ }
53
+ /**
54
+ * Validate that an MCP task definition has all required fields.
55
+ */
56
+ export declare function validateMCPTask(task: MCPServerTaskDefinition): MCPValidationError[];
57
+ /**
58
+ * Compile an MCP server task definition into Promptfoo configuration.
59
+ *
60
+ * This is the core of the MCP mode handler. It produces:
61
+ * 1. A provider config pointing to the MCP server
62
+ * 2. Test cases with tool-call assertions
63
+ * 3. Appropriate prompts for the evaluation
64
+ */
65
+ export declare function compileMCPTask(task: MCPServerTaskDefinition, options?: MCPCompileOptions): MCPCompileResult;
66
+ /** ModeHandler-conformant export for the mcp-server evaluation mode. */
67
+ export declare const handler: ModeHandler;
@@ -0,0 +1,309 @@
1
+ /**
2
+ * MCPServerModeHandler — compilation rules for `mcp-server` evaluation mode.
3
+ *
4
+ * This is the first non-literacy mode handler, proving the compiler
5
+ * architecture works end-to-end. It translates MCP server task definitions
6
+ * into Promptfoo configuration with:
7
+ *
8
+ * - An MCP provider that wraps the server under test
9
+ * - Tool-call assertions compiled to Promptfoo `javascript` assertions
10
+ * - Server lifecycle management via Promptfoo provider hooks
11
+ * - Multi-turn conversation support via Promptfoo's `steps` syntax
12
+ *
13
+ * Promptfoo supports MCP servers as providers natively:
14
+ * ```yaml
15
+ * providers:
16
+ * - id: mcp:./my-server
17
+ * config:
18
+ * command: node
19
+ * args: [./dist/server.js]
20
+ * env: { API_KEY: "..." }
21
+ * ```
22
+ *
23
+ * This handler assembles that config from AILF's `MCPServerTaskDefinition`.
24
+ *
25
+ * @see docs/exec-plans/architecture-overhaul/phase-3-mcp-server-mode.md
26
+ * @see packages/core/src/types/eval-mode-config.ts — MCPServerModeConfig
27
+ * @see packages/core/src/types/generalized-task.ts — MCPServerTaskDefinition
28
+ */
29
+ import { buildMCPAssertions, } from "./mcp-assertions.js";
30
+ // ---------------------------------------------------------------------------
31
+ // Canonical MCP server prompt templates
32
+ // ---------------------------------------------------------------------------
33
+ // Handler-owned prompts for MCP server evaluations. Instructs the model to
34
+ // interact with MCP tools rather than writing standalone code.
35
+ export const MCP_PROMPT_TEMPLATES = {
36
+ "mcp-server": {
37
+ id: "mcp-server",
38
+ label: "MCP Server Tool Use",
39
+ template: `You are an AI assistant with access to an MCP (Model Context Protocol) server that provides tools for interacting with a Sanity content backend.
40
+
41
+ ## Task
42
+ {{task}}
43
+
44
+ ## Instructions
45
+
46
+ 1. Use the available MCP tools to complete the task
47
+ 2. Call tools with the correct parameters as described in their schemas
48
+ 3. Interpret tool responses and use the results to accomplish the goal
49
+ 4. If a tool returns an error, explain the issue clearly
50
+ 5. Prefer using specific tools over broad queries when possible
51
+
52
+ Complete the task using the MCP tools provided:
53
+ `,
54
+ variables: ["task"],
55
+ },
56
+ };
57
+ /**
58
+ * Validate that an MCP task definition has all required fields.
59
+ */
60
+ export function validateMCPTask(task) {
61
+ const errors = [];
62
+ if (!task.id) {
63
+ errors.push({ field: "id", message: "Task ID is required" });
64
+ }
65
+ if (!task.title) {
66
+ errors.push({ field: "title", message: "Task title is required" });
67
+ }
68
+ if (task.serverConfig) {
69
+ const { transport, command, url } = task.serverConfig;
70
+ if (transport === "stdio" && !command) {
71
+ errors.push({
72
+ field: "serverConfig.command",
73
+ message: "Server command is required for stdio transport (e.g., 'node dist/server.js')",
74
+ });
75
+ }
76
+ if ((transport === "sse" || transport === "streamable-http") && !url) {
77
+ errors.push({
78
+ field: "serverConfig.url",
79
+ message: `Server URL is required for ${transport} transport`,
80
+ });
81
+ }
82
+ }
83
+ // Assertions should reference MCP-compatible types
84
+ if (task.assertions) {
85
+ for (const assertion of task.assertions) {
86
+ if (assertion.type === "tool-called" &&
87
+ !("value" in assertion && assertion.value)) {
88
+ errors.push({
89
+ field: "assertions",
90
+ message: 'tool-called assertion requires a "value" specifying the tool name',
91
+ });
92
+ }
93
+ }
94
+ }
95
+ return errors;
96
+ }
97
+ // ---------------------------------------------------------------------------
98
+ // Compilation
99
+ // ---------------------------------------------------------------------------
100
+ /**
101
+ * Compile an MCP server task definition into Promptfoo configuration.
102
+ *
103
+ * This is the core of the MCP mode handler. It produces:
104
+ * 1. A provider config pointing to the MCP server
105
+ * 2. Test cases with tool-call assertions
106
+ * 3. Appropriate prompts for the evaluation
107
+ */
108
+ export function compileMCPTask(task, options) {
109
+ const warnings = [];
110
+ // Validate
111
+ const validationErrors = validateMCPTask(task);
112
+ if (validationErrors.length > 0) {
113
+ for (const err of validationErrors) {
114
+ warnings.push(`MCP task "${task.id}": ${err.field} — ${err.message}`);
115
+ }
116
+ }
117
+ // Build provider
118
+ const providers = buildMCPProvider(task, warnings);
119
+ // Build prompts
120
+ const prompts = buildMCPPrompts(task);
121
+ // Build test cases
122
+ const tests = buildMCPTestCases(task, options, warnings);
123
+ return { providers, tests, prompts, warnings };
124
+ }
125
+ // ---------------------------------------------------------------------------
126
+ // Provider assembly
127
+ // ---------------------------------------------------------------------------
128
+ /**
129
+ * Build a Promptfoo-native MCP provider config.
130
+ *
131
+ * Promptfoo supports MCP servers natively via `id: "mcp"` with a
132
+ * structured config. See: https://www.promptfoo.dev/docs/providers/mcp/
133
+ *
134
+ * Key config shape:
135
+ * { enabled: true, server: { url?, command?, args?, name?, auth?, headers? },
136
+ * tools?, exclude_tools?, timeout?, debug? }
137
+ */
138
+ function buildMCPProvider(task, warnings) {
139
+ const config = task.serverConfig;
140
+ if (!config) {
141
+ warnings.push(`MCP task "${task.id}": no serverConfig — using placeholder provider. ` +
142
+ "Set serverConfig.command or serverConfig.url to point to your MCP server.");
143
+ return [
144
+ {
145
+ id: "mcp",
146
+ label: `MCP Server: ${task.title}`,
147
+ config: { enabled: true, server: { name: task.id } },
148
+ },
149
+ ];
150
+ }
151
+ // Build the server sub-config (Promptfoo's native format)
152
+ const server = { name: task.id };
153
+ if (config.transport === "stdio") {
154
+ // Promptfoo expects command + args as separate fields
155
+ const parts = config.command?.split(/\s+/) ?? [];
156
+ server.command = parts[0] ?? "node";
157
+ if (parts.length > 1) {
158
+ server.args = parts.slice(1);
159
+ }
160
+ }
161
+ else {
162
+ // sse or streamable-http — use URL-based connection
163
+ server.url = config.url;
164
+ }
165
+ // Auth config (Promptfoo supports bearer, basic, api_key, oauth)
166
+ if (config.auth) {
167
+ server.auth = config.auth;
168
+ }
169
+ else if (config.env) {
170
+ // Backward compat: if env has a token-like variable, convert to
171
+ // bearer auth using Promptfoo's {{env.VAR}} template syntax
172
+ const tokenKey = Object.keys(config.env).find((k) => /token|auth|key/i.test(k));
173
+ if (tokenKey) {
174
+ const val = config.env[tokenKey];
175
+ // Convert $env(VAR) syntax to Promptfoo's {{env.VAR}} syntax
176
+ let envVar = val;
177
+ if (val.startsWith("$env(") && val.endsWith(")")) {
178
+ envVar = val.slice(5, -1); // $env(VAR_NAME) → VAR_NAME
179
+ }
180
+ // Validate extracted env var name is non-empty and valid
181
+ if (!envVar || !/^[A-Za-z_][A-Za-z0-9_]*$/.test(envVar)) {
182
+ warnings.push(`MCP task: env var name "${envVar}" from "${val}" is not a valid ` +
183
+ "identifier — skipping auth config");
184
+ }
185
+ else {
186
+ server.auth = {
187
+ type: "bearer",
188
+ token: `{{env.${envVar}}}`,
189
+ };
190
+ }
191
+ }
192
+ }
193
+ // Custom headers (if any non-auth env vars remain)
194
+ if (config.env) {
195
+ const headers = {};
196
+ for (const [key, val] of Object.entries(config.env)) {
197
+ if (/header[_.]?/i.test(key)) {
198
+ headers[key.replace(/^header[_.]?/i, "")] = val;
199
+ }
200
+ }
201
+ if (Object.keys(headers).length > 0) {
202
+ server.headers = headers;
203
+ }
204
+ }
205
+ // Build top-level provider config
206
+ const providerConfig = {
207
+ enabled: true,
208
+ server,
209
+ };
210
+ // Tool filtering — map AILF capabilities to Promptfoo tools
211
+ if (task.capabilities && task.capabilities.length > 0) {
212
+ providerConfig.tools = task.capabilities;
213
+ }
214
+ // Timeout
215
+ if (config.startupTimeoutMs) {
216
+ providerConfig.timeout = config.startupTimeoutMs;
217
+ }
218
+ return [
219
+ {
220
+ id: "mcp",
221
+ label: `MCP Server: ${task.title}`,
222
+ config: providerConfig,
223
+ },
224
+ ];
225
+ }
226
+ // ---------------------------------------------------------------------------
227
+ // Prompt assembly
228
+ // ---------------------------------------------------------------------------
229
+ function buildMCPPrompts(task) {
230
+ // MCP mode uses a single prompt — the task description
231
+ const promptText = task.prompt?.text ??
232
+ task.prompt?.vars?.task ??
233
+ task.description ??
234
+ `Test MCP server: ${task.title}`;
235
+ return [
236
+ {
237
+ id: "mcp-test",
238
+ label: `MCP: ${task.title}`,
239
+ raw: String(promptText),
240
+ },
241
+ ];
242
+ }
243
+ // ---------------------------------------------------------------------------
244
+ // Test case assembly
245
+ // ---------------------------------------------------------------------------
246
+ function buildMCPTestCases(task, options, warnings) {
247
+ const tests = [];
248
+ // Build assertion context
249
+ const assertionContext = {
250
+ capabilities: task.capabilities ?? [],
251
+ graderProvider: options?.graderProvider,
252
+ taskId: task.id,
253
+ };
254
+ // Compile assertions
255
+ // Cast GeneralizedAssertionDefinition[] → AssertionInput[] (structurally compatible)
256
+ const assertions = [];
257
+ if (task.assertions) {
258
+ const rawAssertions = task.assertions;
259
+ const { assertions: mapped, warnings: assertionWarnings } = buildMCPAssertions(rawAssertions, assertionContext);
260
+ assertions.push(...mapped);
261
+ warnings.push(...assertionWarnings);
262
+ }
263
+ // Build test case vars
264
+ const vars = {
265
+ task: task.prompt?.vars?.task ?? task.description ?? `Test: ${task.title}`,
266
+ ...(task.prompt?.vars ?? {}),
267
+ };
268
+ // Primary test case
269
+ tests.push({
270
+ description: `${task.id} — ${task.title}`,
271
+ vars,
272
+ ...(assertions.length > 0 ? { assert: assertions } : {}),
273
+ });
274
+ // Multi-turn test cases
275
+ if (task.multiTurn?.turns && task.multiTurn.turns.length > 0) {
276
+ tests.push({
277
+ description: `${task.id} — ${task.title} [multi-turn]`,
278
+ vars: {
279
+ ...vars,
280
+ __multiTurn: task.multiTurn.turns,
281
+ },
282
+ ...(assertions.length > 0 ? { assert: assertions } : {}),
283
+ });
284
+ }
285
+ return tests;
286
+ }
287
+ // ---------------------------------------------------------------------------
288
+ // ModeHandler adapter
289
+ // ---------------------------------------------------------------------------
290
+ /** ModeHandler-conformant export for the mcp-server evaluation mode. */
291
+ export const handler = {
292
+ getPrompts() {
293
+ return MCP_PROMPT_TEMPLATES;
294
+ },
295
+ compileTask(task, ctx) {
296
+ if (!("mode" in task) || task.mode !== "mcp-server") {
297
+ throw new Error(`MCP server handler received task with mode "${task.mode ?? "undefined"}" — expected "mcp-server"`);
298
+ }
299
+ const result = compileMCPTask(task, {
300
+ graderProvider: ctx.graderProvider,
301
+ });
302
+ return {
303
+ providers: result.providers,
304
+ tests: result.tests,
305
+ prompts: result.prompts,
306
+ warnings: result.warnings,
307
+ };
308
+ },
309
+ };
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Presets — bundled evaluation capabilities.
3
+ *
4
+ * Each preset packages modes, assertions, rubric templates, fixture
5
+ * resolvers, prompt templates, scoring profiles, doc fetcher, source
6
+ * definitions, and feature definitions into a single installable unit.
7
+ */
8
+ export { createSanityLiteracyPreset, registerSanityLiteracyPreset, sanityLiteracyPreset, } from "./sanity-literacy.js";
9
+ export type { SanityLiteracyPresetOptions } from "./sanity-literacy.js";
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Presets — bundled evaluation capabilities.
3
+ *
4
+ * Each preset packages modes, assertions, rubric templates, fixture
5
+ * resolvers, prompt templates, scoring profiles, doc fetcher, source
6
+ * definitions, and feature definitions into a single installable unit.
7
+ */
8
+ export { createSanityLiteracyPreset, registerSanityLiteracyPreset, sanityLiteracyPreset, } from "./sanity-literacy.js";