@sanity/ailf 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/LICENSE +21 -0
  2. package/dist/cli.js +0 -0
  3. package/dist/orchestration/steps/run-eval-step.js +1 -1
  4. package/dist/pipeline/checks.d.ts +8 -3
  5. package/dist/pipeline/checks.js +23 -3
  6. package/package.json +25 -25
  7. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +0 -10
  8. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +0 -185
  9. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +0 -6
  10. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +0 -42
  11. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  12. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  13. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  14. package/dist/_vendor/ailf-tasks/index.js +0 -16
  15. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  16. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  17. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  18. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  19. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  20. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  21. package/dist/adapters/task-sources/yaml-task-source.d.ts +0 -18
  22. package/dist/adapters/task-sources/yaml-task-source.js +0 -139
  23. package/dist/agent-observer/test-imports.d.ts +0 -7
  24. package/dist/agent-observer/test-imports.js +0 -185
  25. package/dist/commands/update-quality-scores.d.ts +0 -5
  26. package/dist/commands/update-quality-scores.js +0 -20
  27. package/dist/lib/agent-behavior-report.d.ts +0 -8
  28. package/dist/lib/agent-behavior-report.js +0 -185
  29. package/dist/lib/baseline.d.ts +0 -19
  30. package/dist/lib/baseline.js +0 -153
  31. package/dist/lib/calculate-scores.d.ts +0 -23
  32. package/dist/lib/calculate-scores.js +0 -42
  33. package/dist/lib/compare.d.ts +0 -18
  34. package/dist/lib/compare.js +0 -170
  35. package/dist/lib/coverage-audit.d.ts +0 -4
  36. package/dist/lib/coverage-audit.js +0 -42
  37. package/dist/lib/discovery-report.d.ts +0 -13
  38. package/dist/lib/discovery-report.js +0 -57
  39. package/dist/lib/fetch-docs.d.ts +0 -30
  40. package/dist/lib/fetch-docs.js +0 -171
  41. package/dist/lib/generate-configs.d.ts +0 -25
  42. package/dist/lib/generate-configs.js +0 -42
  43. package/dist/lib/grader-api.d.ts +0 -21
  44. package/dist/lib/grader-api.js +0 -34
  45. package/dist/lib/grader-compare.d.ts +0 -19
  46. package/dist/lib/grader-compare.js +0 -91
  47. package/dist/lib/grader-consistency.d.ts +0 -27
  48. package/dist/lib/grader-consistency.js +0 -79
  49. package/dist/lib/grader-sensitivity.d.ts +0 -19
  50. package/dist/lib/grader-sensitivity.js +0 -75
  51. package/dist/lib/grader-validate.d.ts +0 -19
  52. package/dist/lib/grader-validate.js +0 -78
  53. package/dist/lib/measure-retrieval.d.ts +0 -14
  54. package/dist/lib/measure-retrieval.js +0 -71
  55. package/dist/lib/pr-comment.d.ts +0 -16
  56. package/dist/lib/pr-comment.js +0 -28
  57. package/dist/lib/readiness-report.d.ts +0 -13
  58. package/dist/lib/readiness-report.js +0 -108
  59. package/dist/lib/webhook-server.d.ts +0 -11
  60. package/dist/lib/webhook-server.js +0 -24
  61. package/dist/lib/weekly-digest.d.ts +0 -24
  62. package/dist/lib/weekly-digest.js +0 -148
  63. package/dist/orchestration/env-bridge.d.ts +0 -21
  64. package/dist/orchestration/env-bridge.js +0 -66
  65. package/dist/orchestration/steps/fetch-docs-shell.d.ts +0 -17
  66. package/dist/orchestration/steps/fetch-docs-shell.js +0 -30
  67. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +0 -9
  68. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +0 -339
  69. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +0 -70
  70. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
  71. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
  72. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
  73. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
  74. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
  75. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +0 -50
  76. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +0 -334
  77. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -69
  78. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -307
  79. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +0 -65
  80. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +0 -368
  81. package/dist/pipeline/compiler/task-bridge.d.ts +0 -41
  82. package/dist/pipeline/compiler/task-bridge.js +0 -92
  83. package/dist/pipeline/expand-tasks.d.ts +0 -232
  84. package/dist/pipeline/expand-tasks.js +0 -467
  85. package/dist/pipeline/generate-configs.d.ts +0 -92
  86. package/dist/pipeline/generate-configs.js +0 -445
  87. package/dist/pipeline/steps/calculate-scores-step.d.ts +0 -11
  88. package/dist/pipeline/steps/calculate-scores-step.js +0 -89
  89. package/dist/pipeline/steps/compare-step.d.ts +0 -18
  90. package/dist/pipeline/steps/compare-step.js +0 -90
  91. package/dist/pipeline/steps/eval-step.d.ts +0 -53
  92. package/dist/pipeline/steps/eval-step.js +0 -347
  93. package/dist/pipeline/steps/fetch-docs-step.d.ts +0 -11
  94. package/dist/pipeline/steps/fetch-docs-step.js +0 -84
  95. package/dist/pipeline/steps/generate-configs-step.d.ts +0 -11
  96. package/dist/pipeline/steps/generate-configs-step.js +0 -98
  97. package/dist/pipeline/steps/grader-consistency-step.d.ts +0 -21
  98. package/dist/pipeline/steps/grader-consistency-step.js +0 -74
  99. package/dist/pipeline/steps/publish-report-step.d.ts +0 -57
  100. package/dist/pipeline/steps/publish-report-step.js +0 -243
  101. package/dist/pipeline/steps/report-step.d.ts +0 -13
  102. package/dist/pipeline/steps/report-step.js +0 -56
  103. package/dist/pipeline/steps/update-scores-step.d.ts +0 -11
  104. package/dist/pipeline/steps/update-scores-step.js +0 -42
  105. package/dist/scripts/agent-behavior-report.d.ts +0 -19
  106. package/dist/scripts/agent-behavior-report.js +0 -315
  107. package/dist/scripts/baseline.d.ts +0 -43
  108. package/dist/scripts/baseline.js +0 -267
  109. package/dist/scripts/calculate-scores.d.ts +0 -166
  110. package/dist/scripts/calculate-scores.js +0 -1296
  111. package/dist/scripts/compare.d.ts +0 -22
  112. package/dist/scripts/compare.js +0 -334
  113. package/dist/scripts/coverage-audit.d.ts +0 -44
  114. package/dist/scripts/coverage-audit.js +0 -209
  115. package/dist/scripts/debug-eval.d.ts +0 -19
  116. package/dist/scripts/debug-eval.js +0 -73
  117. package/dist/scripts/discovery-report.d.ts +0 -58
  118. package/dist/scripts/discovery-report.js +0 -250
  119. package/dist/scripts/fetch-docs.d.ts +0 -35
  120. package/dist/scripts/fetch-docs.js +0 -472
  121. package/dist/scripts/generate-configs.d.ts +0 -66
  122. package/dist/scripts/generate-configs.js +0 -459
  123. package/dist/scripts/grader-api.d.ts +0 -27
  124. package/dist/scripts/grader-api.js +0 -206
  125. package/dist/scripts/grader-compare.d.ts +0 -22
  126. package/dist/scripts/grader-compare.js +0 -368
  127. package/dist/scripts/grader-consistency.d.ts +0 -20
  128. package/dist/scripts/grader-consistency.js +0 -313
  129. package/dist/scripts/grader-sensitivity.d.ts +0 -22
  130. package/dist/scripts/grader-sensitivity.js +0 -354
  131. package/dist/scripts/grader-validate.d.ts +0 -19
  132. package/dist/scripts/grader-validate.js +0 -267
  133. package/dist/scripts/measure-retrieval.d.ts +0 -10
  134. package/dist/scripts/measure-retrieval.js +0 -145
  135. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +0 -24
  136. package/dist/scripts/migrate-tasks-to-content-lake.js +0 -328
  137. package/dist/scripts/pipeline.d.ts +0 -76
  138. package/dist/scripts/pipeline.js +0 -1031
  139. package/dist/scripts/pr-comment.d.ts +0 -10
  140. package/dist/scripts/pr-comment.js +0 -510
  141. package/dist/scripts/readiness-report.d.ts +0 -88
  142. package/dist/scripts/readiness-report.js +0 -342
  143. package/dist/scripts/update-quality-scores.d.ts +0 -15
  144. package/dist/scripts/update-quality-scores.js +0 -184
  145. package/dist/scripts/validate-task-sources.d.ts +0 -21
  146. package/dist/scripts/validate-task-sources.js +0 -210
  147. package/dist/scripts/validate.d.ts +0 -13
  148. package/dist/scripts/validate.js +0 -79
  149. package/dist/scripts/webhook-server.d.ts +0 -26
  150. package/dist/scripts/webhook-server.js +0 -147
  151. package/dist/scripts/weekly-digest.d.ts +0 -24
  152. package/dist/scripts/weekly-digest.js +0 -144
  153. package/dist/sinks/format-slack.d.ts +0 -64
  154. package/dist/sinks/format-slack.js +0 -306
  155. package/dist/sinks/slack-sink.d.ts +0 -27
  156. package/dist/sinks/slack-sink.js +0 -78
  157. package/dist/sinks/webhook-sink.d.ts +0 -19
  158. package/dist/sinks/webhook-sink.js +0 -50
  159. package/tasks/.expanded.agentic.yaml +0 -280
  160. package/tasks/.expanded.yaml +0 -565
@@ -1,379 +0,0 @@
1
- /**
2
- * LiteracyModeHandler — compilation rules for `literacy` mode.
3
- *
4
- * This handler replaces the existing `generate-configs.ts` + `expand-tasks.ts`
5
- * code path for literacy (documentation) evaluation. It compiles
6
- * LiteracyTaskDefinition objects into Promptfoo structure:
7
- *
8
- * - Gold entry (with-docs prompt, canonical docs injected)
9
- * - Baseline entry (without-docs prompt, empty docs)
10
- * - Rubric template resolution from config/rubrics
11
- * - Doc-coverage auto-generation when opted in
12
- * - Structured dimension metadata on rubric assertions
13
- *
14
- * The handler accepts GeneralizedTaskDefinition, narrows to
15
- * LiteracyTaskDefinition, and produces Promptfoo output.
16
- *
17
- * @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
18
- * @see packages/eval/src/pipeline/expand-tasks.ts — the legacy code path
19
- */
20
- import { LiteracyVariant, } from "../../normalize-mode.js";
21
- // ---------------------------------------------------------------------------
22
- // Canonical literacy prompt templates
23
- // ---------------------------------------------------------------------------
24
- // These are the source-of-truth templates for literacy-mode evaluations.
25
- // Previously lived in config/prompts.ts as global templates; now handler-owned
26
- // so non-literacy modes can define their own prompts without collision.
27
- export const LITERACY_PROMPT_TEMPLATES = {
28
- "with-docs": {
29
- id: "with-docs",
30
- label: "With Documentation",
31
- template: `You are an expert Sanity.io developer. Use the following documentation to help implement the task.
32
-
33
- ## Sanity Documentation
34
- {{docs}}
35
-
36
- ## Task
37
- {{task}}
38
-
39
- ## Requirements
40
-
41
- 1. Use ONLY the APIs and patterns shown in the documentation
42
- 2. Provide a complete, working implementation
43
- 3. Include all necessary imports
44
- 4. Follow Sanity best practices as documented
45
-
46
- Provide your implementation:
47
- `,
48
- variables: ["docs", "task"],
49
- },
50
- "without-docs": {
51
- id: "without-docs",
52
- label: "Baseline (No Docs)",
53
- template: `You are an expert Sanity.io developer.
54
-
55
- ## Task
56
- {{task}}
57
-
58
- ## Requirements
59
-
60
- 1. Provide a complete, working implementation
61
- 2. Include all necessary imports
62
- 3. Follow Sanity best practices
63
-
64
- Provide your implementation:
65
- `,
66
- variables: ["task"],
67
- },
68
- agentic: {
69
- id: "agentic",
70
- label: "Agentic (self-retrieval)",
71
- template: `You are an expert developer helping implement a Sanity.io feature.
72
- You have access to web search and page fetching tools.
73
-
74
- IMPORTANT: Before writing any code, search for and read the relevant
75
- Sanity.io documentation to ensure you are using the latest APIs and
76
- best practices. Do not rely on memory alone.
77
-
78
- ## Task
79
- {{task}}
80
-
81
- ## Requirements
82
-
83
- 1. Search for relevant Sanity documentation before implementing
84
- 2. Use ONLY the APIs and patterns from the current official docs
85
- 3. Provide a complete, working implementation
86
- 4. Include all necessary imports
87
- 5. Follow Sanity best practices as documented
88
-
89
- Provide your implementation:
90
- `,
91
- variables: ["task"],
92
- },
93
- };
94
- /**
95
- * Validate a literacy task definition.
96
- */
97
- export function validateLiteracyTask(task) {
98
- const errors = [];
99
- if (!task.id)
100
- errors.push({ field: "id", message: "Task ID is required" });
101
- if (!task.title) {
102
- errors.push({
103
- field: "title",
104
- message: "Task title is required",
105
- });
106
- }
107
- const promptText = task.prompt?.text ??
108
- task.prompt?.template ??
109
- task.prompt?.vars?.task ??
110
- "";
111
- if (!promptText) {
112
- errors.push({
113
- field: "prompt",
114
- message: "Task prompt text is required",
115
- });
116
- }
117
- return errors;
118
- }
119
- // ---------------------------------------------------------------------------
120
- // Compilation
121
- // ---------------------------------------------------------------------------
122
- /**
123
- * Compile a literacy task into Promptfoo configuration.
124
- *
125
- * Produces the same structure as the legacy expand-tasks.ts path:
126
- * - Gold entry with with-docs prompt and canonical doc context
127
- * - Baseline entry with without-docs prompt and empty docs
128
- * - Rubric assertions with structured dimension metadata
129
- */
130
- export function compileLiteracyTask(task, options) {
131
- const warnings = [];
132
- const evalMode = options?.evalMode ?? LiteracyVariant.STANDARD;
133
- // Validation
134
- for (const err of validateLiteracyTask(task)) {
135
- warnings.push(`Literacy task "${task.id}": ${err.field} — ${err.message}`);
136
- }
137
- // Build providers from model list
138
- const providers = buildProviders(options);
139
- // Build prompts
140
- const prompts = buildPrompts(evalMode);
141
- // Build test cases (gold + baseline)
142
- const tests = buildTestCases(task, evalMode, options, warnings);
143
- return { providers, tests, prompts, warnings };
144
- }
145
- // ---------------------------------------------------------------------------
146
- // Provider assembly
147
- // ---------------------------------------------------------------------------
148
- function buildProviders(options) {
149
- if (options?.models && options.models.length > 0) {
150
- return options.models.map((m) => ({
151
- id: m.id,
152
- label: m.label,
153
- config: m.config,
154
- }));
155
- }
156
- return [];
157
- }
158
- // ---------------------------------------------------------------------------
159
- // Prompt assembly
160
- // ---------------------------------------------------------------------------
161
- /** Convert a PromptTemplate to the PromptfooPrompt shape used by compile results */
162
- function templateToPromptfoo(pt) {
163
- return { id: pt.id, label: pt.label, raw: pt.template };
164
- }
165
- function buildPrompts(evalMode) {
166
- if (evalMode === "agentic") {
167
- return [templateToPromptfoo(LITERACY_PROMPT_TEMPLATES["agentic"])];
168
- }
169
- // Baseline mode: with-docs + without-docs prompts
170
- return [
171
- templateToPromptfoo(LITERACY_PROMPT_TEMPLATES["with-docs"]),
172
- templateToPromptfoo(LITERACY_PROMPT_TEMPLATES["without-docs"]),
173
- ];
174
- }
175
- // ---------------------------------------------------------------------------
176
- // Test case assembly
177
- // ---------------------------------------------------------------------------
178
- function buildTestCases(task, evalMode, options, warnings) {
179
- const tests = [];
180
- // Extract fields from the LiteracyTaskDefinition shape
181
- const promptText = task.prompt?.text ?? task.prompt?.template ?? "";
182
- const contextDocs = task.context?.docs ?? [];
183
- const taskArea = task.area ?? "";
184
- const taskTitle = task.title;
185
- const promptVars = task.prompt?.vars ?? {};
186
- // Resolve doc path
187
- const hasDocs = contextDocs.length > 0;
188
- const docsVar = hasDocs ? `file://contexts/canonical/${task.id}.md` : "";
189
- // Resolve assertions
190
- const assertions = resolveAssertions(task, options, warnings);
191
- // Gold entry — canonical docs injected
192
- const goldVars = {
193
- task: promptText,
194
- docs: docsVar,
195
- __featureArea: taskArea,
196
- ...promptVars,
197
- };
198
- tests.push({
199
- description: `${taskTitle} (gold)`,
200
- vars: goldVars,
201
- ...(evalMode === LiteracyVariant.STANDARD
202
- ? { prompts: ["with-docs"] }
203
- : {}),
204
- ...(assertions.length > 0 ? { assert: assertions } : {}),
205
- });
206
- // Baseline entry — no docs (floor measurement)
207
- // Skipped in agentic mode (the prompt doesn't use {{docs}})
208
- if (evalMode !== "agentic") {
209
- const baselineEnabled = task.baseline?.enabled !== false;
210
- if (baselineEnabled) {
211
- const baselineAssertions = buildBaselineAssertions(assertions, task.baseline?.rubric);
212
- tests.push({
213
- description: `${taskTitle} (baseline)`,
214
- vars: {
215
- task: promptText,
216
- docs: "",
217
- __featureArea: taskArea,
218
- ...promptVars,
219
- },
220
- prompts: ["without-docs"],
221
- ...(baselineAssertions.length > 0
222
- ? { assert: baselineAssertions }
223
- : {}),
224
- });
225
- }
226
- }
227
- return tests;
228
- }
229
- // ---------------------------------------------------------------------------
230
- // Assertion resolution
231
- // ---------------------------------------------------------------------------
232
- function resolveAssertions(task, options, warnings) {
233
- const assertions = [];
234
- for (const a of task.assertions ?? []) {
235
- if (a.type === "llm-rubric" && "template" in a) {
236
- // Templated assertion — resolve from rubric config
237
- const resolved = resolveTemplatedAssertion(a, options?.rubricConfig, options?.graderProvider, warnings);
238
- if (resolved)
239
- assertions.push(resolved);
240
- }
241
- else {
242
- // Value assertion — pass through with optional grader
243
- assertions.push({
244
- type: a.type,
245
- ...("value" in a ? { value: a.value } : {}),
246
- ...(typeof a.weight === "number"
247
- ? { weight: a.weight }
248
- : {}),
249
- ...(a.type === "llm-rubric" && options?.graderProvider
250
- ? { provider: options.graderProvider }
251
- : {}),
252
- });
253
- }
254
- }
255
- // Doc-coverage auto-generation
256
- if (task.docCoverage) {
257
- const docCoverageAssertion = buildDocCoverageAssertion(options?.rubricConfig, options?.graderProvider);
258
- if (docCoverageAssertion)
259
- assertions.push(docCoverageAssertion);
260
- }
261
- return assertions;
262
- }
263
- function resolveTemplatedAssertion(a, rubricConfig, graderProvider, warnings) {
264
- if (!rubricConfig) {
265
- warnings.push(`No rubric config — template "${a.template}" cannot be resolved`);
266
- return null;
267
- }
268
- const template = rubricConfig.templates[a.template];
269
- if (!template) {
270
- warnings.push(`Unknown rubric template: "${a.template}"`);
271
- return null;
272
- }
273
- // Assemble the rubric text
274
- const scaleText = template.scale.map((s) => `- ${s}`).join("\n");
275
- const criteriaText = a.criteria.map((c) => `- ${c}`).join("\n");
276
- // Match legacy rubric assembly format:
277
- // header\n- scale...\n\ncriteria_label\n- criteria...\n\nfooter
278
- const rubricValue = `${template.header}\n${scaleText}\n\n` +
279
- `${template.criteria_label ?? "Check for:"}\n${criteriaText}\n\n` +
280
- `Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}`;
281
- return {
282
- type: "llm-rubric",
283
- value: rubricValue,
284
- ...(graderProvider ? { provider: graderProvider } : {}),
285
- ...(template.dimension
286
- ? { metadata: { dimension: template.dimension, maxScore: 100 } }
287
- : {}),
288
- };
289
- }
290
- function buildDocCoverageAssertion(rubricConfig, graderProvider) {
291
- if (!rubricConfig?.templates["doc-coverage"])
292
- return null;
293
- const template = rubricConfig.templates["doc-coverage"];
294
- const scaleText = template.scale.map((s) => `- ${s}`).join("\n");
295
- const rubricValue = `${template.header}\n${scaleText}\n\n` +
296
- `Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}`;
297
- return {
298
- type: "llm-rubric",
299
- value: rubricValue,
300
- ...(graderProvider ? { provider: graderProvider } : {}),
301
- ...(template.dimension
302
- ? { metadata: { dimension: template.dimension, maxScore: 100 } }
303
- : {}),
304
- };
305
- }
306
- // ---------------------------------------------------------------------------
307
- // Baseline assertion filtering
308
- // ---------------------------------------------------------------------------
309
- /**
310
- * Build baseline assertions matching the legacy expand-tasks behavior.
311
- *
312
- * - "full": all assertions carried over
313
- * - "abbreviated": only first llm-rubric with shortened prompt
314
- * - "none": no assertions
315
- */
316
- function buildBaselineAssertions(goldAssertions, rubricMode) {
317
- const mode = rubricMode ?? "full";
318
- if (mode === "none")
319
- return [];
320
- if (mode === "full")
321
- return [...goldAssertions];
322
- // Abbreviated: keep first llm-rubric as summary, skip rest
323
- const abbreviated = [];
324
- let foundFirst = false;
325
- for (const a of goldAssertions) {
326
- if (a.type === "llm-rubric") {
327
- if (!foundFirst) {
328
- foundFirst = true;
329
- abbreviated.push({
330
- type: "llm-rubric",
331
- value: "Score task completion from 0 to 100 (same criteria as above).\n" +
332
- 'Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}',
333
- ...(a.provider ? { provider: a.provider } : {}),
334
- });
335
- }
336
- }
337
- // Non-rubric assertions are excluded (matching legacy behavior)
338
- }
339
- return abbreviated;
340
- }
341
- // ---------------------------------------------------------------------------
342
- // ModeHandler adapter — wraps compileLiteracyTask for registry dispatch
343
- // ---------------------------------------------------------------------------
344
- /**
345
- * ModeHandler-conformant export for the literacy evaluation mode.
346
- *
347
- * The pipeline looks up this handler via `registry.getMode("literacy")`
348
- * and calls `handler.compileTask()`. The handler narrows the union to
349
- * LiteracyTaskDefinition and delegates to `compileLiteracyTask()`.
350
- *
351
- * Note: The literacy handler's `evalMode` variant ("baseline" vs "agentic")
352
- * is passed via `ctx.evalMode` — a literacy-specific extension of
353
- * CompilationContext. The pipeline sets this when compiling literacy tasks.
354
- */
355
- export const handler = {
356
- getPrompts() {
357
- return LITERACY_PROMPT_TEMPLATES;
358
- },
359
- compileTask(task, ctx) {
360
- // Type-narrow the union — literacy handler only accepts literacy tasks
361
- if (task.mode !== "literacy") {
362
- throw new Error(`Literacy handler received task with mode "${task.mode}" — expected "literacy"`);
363
- }
364
- const result = compileLiteracyTask(task, {
365
- graderProvider: ctx.graderProvider,
366
- rootDir: ctx.rootDir,
367
- models: ctx.models,
368
- rubricConfig: ctx.rubricConfig,
369
- evalMode: ctx
370
- .evalMode,
371
- });
372
- return {
373
- providers: result.providers,
374
- tests: result.tests,
375
- prompts: result.prompts,
376
- warnings: result.warnings,
377
- };
378
- },
379
- };
@@ -1,50 +0,0 @@
1
- /**
2
- * MCP-specific assertion types — ergonomic assertions for MCP server testing.
3
- *
4
- * Each assertion type compiles down to a Promptfoo `javascript` assertion
5
- * with the appropriate validation logic. The developer writes:
6
- *
7
- * ```typescript
8
- * assertions: [
9
- * { type: "tool-called", value: "getDocument" },
10
- * { type: "tool-input-matches", value: { documentId: "doc-123" } },
11
- * { type: "tool-output-matches", value: { title: "Hello" } },
12
- * { type: "error-returned", value: { code: -32602 } },
13
- * ]
14
- * ```
15
- *
16
- * The compiler transforms these into Promptfoo-compatible `javascript`
17
- * assertions that inspect the tool call trace in the evaluation output.
18
- *
19
- * @see docs/exec-plans/architecture-overhaul/phase-3-mcp-server-mode.md
20
- */
21
- import type { PromptfooAssertion } from "../assertion-mapper.js";
22
- /** Context for building MCP assertions */
23
- export interface MCPAssertionContext {
24
- /** Task ID (for error messages) */
25
- taskId: string;
26
- /** Expected server capabilities */
27
- capabilities: string[];
28
- /** Grader provider for LLM-graded assertions */
29
- graderProvider?: string;
30
- }
31
- /** An AILF assertion definition — accepts both core and generalized types */
32
- interface AssertionInput {
33
- type: string;
34
- value?: unknown;
35
- weight?: number;
36
- /** Allow additional properties from generalized assertions */
37
- [key: string]: unknown;
38
- }
39
- /**
40
- * Build MCP-specific assertions from task assertion definitions.
41
- *
42
- * Handles both MCP-specific types (tool-called, tool-input-matches, etc.)
43
- * and standard assertion types (contains, llm-rubric, etc.) which are
44
- * passed through unchanged.
45
- */
46
- export declare function buildMCPAssertions(assertions: AssertionInput[], context: MCPAssertionContext): {
47
- assertions: PromptfooAssertion[];
48
- warnings: string[];
49
- };
50
- export {};