@sanity/ailf 2.0.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/LICENSE +21 -0
  2. package/dist/_vendor/ailf-core/examples/index.d.ts +50 -1
  3. package/dist/_vendor/ailf-core/examples/index.js +66 -1
  4. package/dist/agent-harness/assertions-runtime.d.ts +49 -0
  5. package/dist/agent-harness/assertions-runtime.js +138 -0
  6. package/dist/agent-harness/provider.d.ts +58 -0
  7. package/dist/agent-harness/provider.js +104 -0
  8. package/dist/cli.js +0 -0
  9. package/dist/commands/init.js +3 -0
  10. package/dist/orchestration/steps/generate-configs-step.d.ts +7 -0
  11. package/dist/orchestration/steps/generate-configs-step.js +35 -2
  12. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +39 -25
  13. package/dist/pipeline/compiler/compiler-to-yaml.js +78 -7
  14. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +9 -0
  15. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +28 -85
  16. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +22 -15
  17. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +8 -1
  18. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +42 -12
  19. package/package.json +25 -24
  20. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +0 -10
  21. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +0 -185
  22. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +0 -6
  23. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +0 -42
  24. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  25. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  26. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  27. package/dist/_vendor/ailf-tasks/index.js +0 -16
  28. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  29. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  30. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  31. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  32. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  33. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  34. package/dist/adapters/task-sources/yaml-task-source.d.ts +0 -18
  35. package/dist/adapters/task-sources/yaml-task-source.js +0 -139
  36. package/dist/agent-observer/test-imports.d.ts +0 -7
  37. package/dist/agent-observer/test-imports.js +0 -185
  38. package/dist/commands/update-quality-scores.d.ts +0 -5
  39. package/dist/commands/update-quality-scores.js +0 -20
  40. package/dist/lib/agent-behavior-report.d.ts +0 -8
  41. package/dist/lib/agent-behavior-report.js +0 -185
  42. package/dist/lib/baseline.d.ts +0 -19
  43. package/dist/lib/baseline.js +0 -153
  44. package/dist/lib/calculate-scores.d.ts +0 -23
  45. package/dist/lib/calculate-scores.js +0 -42
  46. package/dist/lib/compare.d.ts +0 -18
  47. package/dist/lib/compare.js +0 -170
  48. package/dist/lib/coverage-audit.d.ts +0 -4
  49. package/dist/lib/coverage-audit.js +0 -42
  50. package/dist/lib/discovery-report.d.ts +0 -13
  51. package/dist/lib/discovery-report.js +0 -57
  52. package/dist/lib/fetch-docs.d.ts +0 -30
  53. package/dist/lib/fetch-docs.js +0 -171
  54. package/dist/lib/generate-configs.d.ts +0 -25
  55. package/dist/lib/generate-configs.js +0 -42
  56. package/dist/lib/grader-api.d.ts +0 -21
  57. package/dist/lib/grader-api.js +0 -34
  58. package/dist/lib/grader-compare.d.ts +0 -19
  59. package/dist/lib/grader-compare.js +0 -91
  60. package/dist/lib/grader-consistency.d.ts +0 -27
  61. package/dist/lib/grader-consistency.js +0 -79
  62. package/dist/lib/grader-sensitivity.d.ts +0 -19
  63. package/dist/lib/grader-sensitivity.js +0 -75
  64. package/dist/lib/grader-validate.d.ts +0 -19
  65. package/dist/lib/grader-validate.js +0 -78
  66. package/dist/lib/measure-retrieval.d.ts +0 -14
  67. package/dist/lib/measure-retrieval.js +0 -71
  68. package/dist/lib/pr-comment.d.ts +0 -16
  69. package/dist/lib/pr-comment.js +0 -28
  70. package/dist/lib/readiness-report.d.ts +0 -13
  71. package/dist/lib/readiness-report.js +0 -108
  72. package/dist/lib/webhook-server.d.ts +0 -11
  73. package/dist/lib/webhook-server.js +0 -24
  74. package/dist/lib/weekly-digest.d.ts +0 -24
  75. package/dist/lib/weekly-digest.js +0 -148
  76. package/dist/orchestration/env-bridge.d.ts +0 -21
  77. package/dist/orchestration/env-bridge.js +0 -66
  78. package/dist/orchestration/steps/fetch-docs-shell.d.ts +0 -17
  79. package/dist/orchestration/steps/fetch-docs-shell.js +0 -30
  80. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +0 -9
  81. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +0 -339
  82. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +0 -70
  83. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
  84. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
  85. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
  86. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
  87. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
  88. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +0 -50
  89. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +0 -334
  90. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -69
  91. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -307
  92. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +0 -65
  93. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +0 -368
  94. package/dist/pipeline/compiler/task-bridge.d.ts +0 -41
  95. package/dist/pipeline/compiler/task-bridge.js +0 -92
  96. package/dist/pipeline/expand-tasks.d.ts +0 -232
  97. package/dist/pipeline/expand-tasks.js +0 -467
  98. package/dist/pipeline/generate-configs.d.ts +0 -92
  99. package/dist/pipeline/generate-configs.js +0 -445
  100. package/dist/pipeline/steps/calculate-scores-step.d.ts +0 -11
  101. package/dist/pipeline/steps/calculate-scores-step.js +0 -89
  102. package/dist/pipeline/steps/compare-step.d.ts +0 -18
  103. package/dist/pipeline/steps/compare-step.js +0 -90
  104. package/dist/pipeline/steps/eval-step.d.ts +0 -53
  105. package/dist/pipeline/steps/eval-step.js +0 -347
  106. package/dist/pipeline/steps/fetch-docs-step.d.ts +0 -11
  107. package/dist/pipeline/steps/fetch-docs-step.js +0 -84
  108. package/dist/pipeline/steps/generate-configs-step.d.ts +0 -11
  109. package/dist/pipeline/steps/generate-configs-step.js +0 -98
  110. package/dist/pipeline/steps/grader-consistency-step.d.ts +0 -21
  111. package/dist/pipeline/steps/grader-consistency-step.js +0 -74
  112. package/dist/pipeline/steps/publish-report-step.d.ts +0 -57
  113. package/dist/pipeline/steps/publish-report-step.js +0 -243
  114. package/dist/pipeline/steps/report-step.d.ts +0 -13
  115. package/dist/pipeline/steps/report-step.js +0 -56
  116. package/dist/pipeline/steps/update-scores-step.d.ts +0 -11
  117. package/dist/pipeline/steps/update-scores-step.js +0 -42
  118. package/dist/scripts/agent-behavior-report.d.ts +0 -19
  119. package/dist/scripts/agent-behavior-report.js +0 -315
  120. package/dist/scripts/baseline.d.ts +0 -43
  121. package/dist/scripts/baseline.js +0 -267
  122. package/dist/scripts/calculate-scores.d.ts +0 -166
  123. package/dist/scripts/calculate-scores.js +0 -1296
  124. package/dist/scripts/compare.d.ts +0 -22
  125. package/dist/scripts/compare.js +0 -334
  126. package/dist/scripts/coverage-audit.d.ts +0 -44
  127. package/dist/scripts/coverage-audit.js +0 -209
  128. package/dist/scripts/debug-eval.d.ts +0 -19
  129. package/dist/scripts/debug-eval.js +0 -73
  130. package/dist/scripts/discovery-report.d.ts +0 -58
  131. package/dist/scripts/discovery-report.js +0 -250
  132. package/dist/scripts/fetch-docs.d.ts +0 -35
  133. package/dist/scripts/fetch-docs.js +0 -472
  134. package/dist/scripts/generate-configs.d.ts +0 -66
  135. package/dist/scripts/generate-configs.js +0 -459
  136. package/dist/scripts/grader-api.d.ts +0 -27
  137. package/dist/scripts/grader-api.js +0 -206
  138. package/dist/scripts/grader-compare.d.ts +0 -22
  139. package/dist/scripts/grader-compare.js +0 -368
  140. package/dist/scripts/grader-consistency.d.ts +0 -20
  141. package/dist/scripts/grader-consistency.js +0 -313
  142. package/dist/scripts/grader-sensitivity.d.ts +0 -22
  143. package/dist/scripts/grader-sensitivity.js +0 -354
  144. package/dist/scripts/grader-validate.d.ts +0 -19
  145. package/dist/scripts/grader-validate.js +0 -267
  146. package/dist/scripts/measure-retrieval.d.ts +0 -10
  147. package/dist/scripts/measure-retrieval.js +0 -145
  148. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +0 -24
  149. package/dist/scripts/migrate-tasks-to-content-lake.js +0 -328
  150. package/dist/scripts/pipeline.d.ts +0 -76
  151. package/dist/scripts/pipeline.js +0 -1031
  152. package/dist/scripts/pr-comment.d.ts +0 -10
  153. package/dist/scripts/pr-comment.js +0 -510
  154. package/dist/scripts/readiness-report.d.ts +0 -88
  155. package/dist/scripts/readiness-report.js +0 -342
  156. package/dist/scripts/update-quality-scores.d.ts +0 -15
  157. package/dist/scripts/update-quality-scores.js +0 -184
  158. package/dist/scripts/validate-task-sources.d.ts +0 -21
  159. package/dist/scripts/validate-task-sources.js +0 -210
  160. package/dist/scripts/validate.d.ts +0 -13
  161. package/dist/scripts/validate.js +0 -79
  162. package/dist/scripts/webhook-server.d.ts +0 -26
  163. package/dist/scripts/webhook-server.js +0 -147
  164. package/dist/scripts/weekly-digest.d.ts +0 -24
  165. package/dist/scripts/weekly-digest.js +0 -144
  166. package/dist/sinks/format-slack.d.ts +0 -64
  167. package/dist/sinks/format-slack.js +0 -306
  168. package/dist/sinks/slack-sink.d.ts +0 -27
  169. package/dist/sinks/slack-sink.js +0 -78
  170. package/dist/sinks/webhook-sink.d.ts +0 -19
  171. package/dist/sinks/webhook-sink.js +0 -50
  172. package/tasks/.expanded.agentic.yaml +0 -280
  173. package/tasks/.expanded.yaml +0 -565
@@ -1,245 +0,0 @@
1
- /**
2
- * KnowledgeProbeModeHandler — compilation rules for `knowledge-probe` mode.
3
- *
4
- * The simplest mode handler. Knowledge probes measure raw model knowledge
5
- * without documentation context, tool calling, or sandboxed execution.
6
- * They answer: "What does this model know about X without any help?"
7
- *
8
- * Key properties:
9
- * - No doc vars injected (intentionally empty)
10
- * - Uses the without-docs prompt template (or custom prompt)
11
- * - Standard LLM providers only (no agent SDKs, no MCP)
12
- * - No retrieval metrics (precision/recall/F1 not applicable)
13
- * - Results feed into the standard cross-model comparison pipeline
14
- *
15
- * This handler is the reference implementation for the mode handler pattern.
16
- *
17
- * @see docs/exec-plans/architecture-overhaul/phase-5-knowledge-probe.md
18
- * @see packages/core/src/types/generalized-task.ts — KnowledgeProbeTaskDefinition
19
- */
20
- // ---------------------------------------------------------------------------
21
- // Canonical knowledge probe prompt templates
22
- // ---------------------------------------------------------------------------
23
- // Handler-owned prompts for knowledge probe evaluations. These ask factual
24
- // questions without injecting documentation context — measuring raw model
25
- // knowledge about Sanity concepts.
26
- export const KNOWLEDGE_PROBE_PROMPT_TEMPLATES = {
27
- "knowledge-probe": {
28
- id: "knowledge-probe",
29
- label: "Knowledge Probe (No Docs)",
30
- template: `Answer the following question about Sanity.io based on your existing knowledge. Do not search for or reference external documentation.
31
-
32
- ## Question
33
- {{task}}
34
-
35
- ## Instructions
36
-
37
- 1. Answer based solely on what you already know
38
- 2. Be specific — include API names, function signatures, and code examples where relevant
39
- 3. If you are unsure about a detail, say so rather than guessing
40
- 4. Provide a complete, accurate answer
41
-
42
- Your answer:
43
- `,
44
- variables: ["task"],
45
- },
46
- };
47
- /**
48
- * Validate that a knowledge probe task definition has all required fields.
49
- */
50
- export function validateKnowledgeProbeTask(task) {
51
- const errors = [];
52
- if (!task.id) {
53
- errors.push({ field: "id", message: "Task ID is required" });
54
- }
55
- if (!task.title) {
56
- errors.push({ field: "title", message: "Task title is required" });
57
- }
58
- // Knowledge probes must have either a prompt or a description
59
- if (!task.prompt?.text && !task.prompt?.vars?.task && !task.description) {
60
- errors.push({
61
- field: "prompt",
62
- message: "Knowledge probe tasks require either prompt.text, prompt.vars.task, " +
63
- "or description — the question to ask the model",
64
- });
65
- }
66
- return errors;
67
- }
68
- // ---------------------------------------------------------------------------
69
- // Compilation
70
- // ---------------------------------------------------------------------------
71
- /**
72
- * Compile a knowledge probe task definition into Promptfoo configuration.
73
- *
74
- * This is intentionally minimal — knowledge probes map almost 1:1 to
75
- * basic Promptfoo test cases. The AILF value-add is type-safe authoring,
76
- * cross-model comparison, and score normalization.
77
- */
78
- export function compileKnowledgeProbeTask(task, options) {
79
- const warnings = [];
80
- // Validate
81
- const validationErrors = validateKnowledgeProbeTask(task);
82
- for (const err of validationErrors) {
83
- warnings.push(`Knowledge probe "${task.id}": ${err.field} — ${err.message}`);
84
- }
85
- // Build providers from model list (or use a default placeholder)
86
- const providers = buildProviders(options);
87
- // Build prompts — knowledge probes use a single no-docs prompt
88
- const prompts = buildPrompts(task);
89
- // Build test cases
90
- const tests = buildTestCases(task, options, warnings);
91
- // Build metadata
92
- const metadata = {
93
- mode: "knowledge-probe",
94
- probeStrategy: task.probeStrategy ?? "breadth-first",
95
- noDocContext: true,
96
- retrievalMetrics: false,
97
- };
98
- return { providers, tests, prompts, metadata, warnings };
99
- }
100
- // ---------------------------------------------------------------------------
101
- // Provider assembly
102
- // ---------------------------------------------------------------------------
103
- function buildProviders(options) {
104
- if (options?.models && options.models.length > 0) {
105
- return options.models.map((model) => ({
106
- id: model.id,
107
- label: model.label,
108
- config: model.config,
109
- }));
110
- }
111
- // No models specified — return empty (caller should provide models)
112
- return [];
113
- }
114
- // ---------------------------------------------------------------------------
115
- // Prompt assembly
116
- // ---------------------------------------------------------------------------
117
- function buildPrompts(task) {
118
- // Knowledge probes use a single prompt — no with-docs/without-docs split.
119
- // The prompt IS the probe question.
120
- const promptText = task.prompt?.text ??
121
- task.prompt?.vars?.task ??
122
- task.description ??
123
- `Knowledge probe: ${task.title}`;
124
- const systemMessage = task.prompt?.systemMessage;
125
- return [
126
- {
127
- id: "knowledge-probe",
128
- label: `Probe: ${task.title}`,
129
- raw: systemMessage
130
- ? `[system]\n${systemMessage}\n\n[user]\n${String(promptText)}`
131
- : String(promptText),
132
- },
133
- ];
134
- }
135
- // ---------------------------------------------------------------------------
136
- // Test case assembly
137
- // ---------------------------------------------------------------------------
138
- function buildTestCases(task, options, warnings) {
139
- // Build assertions
140
- const assertions = [];
141
- if (task.assertions) {
142
- for (const assertion of task.assertions) {
143
- const raw = assertion;
144
- const mapped = mapKnowledgeProbeAssertion(raw, options, warnings);
145
- if (mapped)
146
- assertions.push(mapped);
147
- }
148
- }
149
- // Build vars — intentionally no docs
150
- const vars = {
151
- task: task.prompt?.vars?.task ??
152
- task.description ??
153
- `Knowledge probe: ${task.title}`,
154
- ...(task.prompt?.vars ?? {}),
155
- // Metadata for scoring pipeline
156
- __mode: "knowledge-probe",
157
- __probeStrategy: task.probeStrategy ?? "breadth-first",
158
- };
159
- // Explicitly do NOT include docs
160
- // This is the defining characteristic of knowledge-probe mode
161
- delete vars.docs;
162
- return [
163
- {
164
- description: `${task.id} — ${task.title}`,
165
- vars,
166
- ...(assertions.length > 0 ? { assert: assertions } : {}),
167
- },
168
- ];
169
- }
170
- // ---------------------------------------------------------------------------
171
- // Assertion mapping
172
- // ---------------------------------------------------------------------------
173
- function mapKnowledgeProbeAssertion(assertion, options, warnings) {
174
- switch (assertion.type) {
175
- // Standard assertions — pass through
176
- case "contains":
177
- case "contains-all":
178
- case "contains-any":
179
- case "equals":
180
- case "is-json":
181
- case "javascript":
182
- case "python":
183
- case "regex":
184
- case "similar":
185
- return {
186
- type: assertion.type,
187
- ...("value" in assertion ? { value: assertion.value } : {}),
188
- ...(typeof assertion.weight === "number"
189
- ? { weight: assertion.weight }
190
- : {}),
191
- };
192
- // LLM-graded assertions — add grader provider
193
- case "g-eval":
194
- case "llm-rubric":
195
- case "model-graded-closedqa":
196
- case "model-graded-factuality":
197
- return {
198
- type: assertion.type,
199
- ...("value" in assertion ? { value: assertion.value } : {}),
200
- ...(typeof assertion.weight === "number"
201
- ? { weight: assertion.weight }
202
- : {}),
203
- ...(options?.graderProvider
204
- ? { provider: options.graderProvider }
205
- : {}),
206
- };
207
- // Tool-use assertions are NOT valid for knowledge probes
208
- case "skill-used":
209
- case "tool-call-f1":
210
- case "tool-called":
211
- case "tool-input-matches":
212
- case "tool-output-matches":
213
- warnings.push(`Knowledge probe "${assertion.type}" assertion is not applicable — ` +
214
- "knowledge probes don't use tools. Assertion skipped.");
215
- return null;
216
- default:
217
- warnings.push(`Knowledge probe: unknown assertion type "${assertion.type}" — passed through`);
218
- return {
219
- type: assertion.type,
220
- ...("value" in assertion ? { value: assertion.value } : {}),
221
- };
222
- }
223
- }
224
- // ---------------------------------------------------------------------------
225
- // ModeHandler adapter
226
- // ---------------------------------------------------------------------------
227
- /** ModeHandler-conformant export for the knowledge-probe evaluation mode. */
228
- export const handler = {
229
- getPrompts() {
230
- return KNOWLEDGE_PROBE_PROMPT_TEMPLATES;
231
- },
232
- compileTask(task, ctx) {
233
- if (!("mode" in task) || task.mode !== "knowledge-probe") {
234
- throw new Error(`Knowledge probe handler received task with mode "${task.mode ?? "undefined"}" — expected "knowledge-probe"`);
235
- }
236
- const result = compileKnowledgeProbeTask(task, { graderProvider: ctx.graderProvider, models: ctx.models });
237
- return {
238
- providers: result.providers,
239
- tests: result.tests,
240
- prompts: result.prompts,
241
- warnings: result.warnings,
242
- extras: { metadata: result.metadata },
243
- };
244
- },
245
- };
@@ -1,89 +0,0 @@
1
- /**
2
- * LiteracyModeHandler — compilation rules for `literacy` mode.
3
- *
4
- * This handler replaces the existing `generate-configs.ts` + `expand-tasks.ts`
5
- * code path for literacy (documentation) evaluation. It compiles
6
- * LiteracyTaskDefinition objects into Promptfoo structure:
7
- *
8
- * - Gold entry (with-docs prompt, canonical docs injected)
9
- * - Baseline entry (without-docs prompt, empty docs)
10
- * - Rubric template resolution from config/rubrics
11
- * - Doc-coverage auto-generation when opted in
12
- * - Structured dimension metadata on rubric assertions
13
- *
14
- * The handler accepts GeneralizedTaskDefinition, narrows to
15
- * LiteracyTaskDefinition, and produces Promptfoo output.
16
- *
17
- * @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
18
- * @see packages/eval/src/pipeline/expand-tasks.ts — the legacy code path
19
- */
20
- import type { LiteracyTaskDefinition, ModeHandler, PromptTemplate } from "../../../_vendor/ailf-core/index.d.ts";
21
- import { type LiteracyEvalSubMode } from "../../normalize-mode.js";
22
- import type { PromptfooPrompt, PromptfooProvider, PromptfooTestCase } from "../promptfoo-compiler.js";
23
- export declare const LITERACY_PROMPT_TEMPLATES: Record<string, PromptTemplate>;
24
- /** Options for compiling a literacy task */
25
- export interface LiteracyCompileOptions {
26
- /** Grader provider for LLM-graded assertions */
27
- graderProvider?: string;
28
- /** Root directory (for resolving file:// doc paths) */
29
- rootDir?: string;
30
- /** Evaluation sub-mode — controls which entries are generated */
31
- evalMode?: LiteracyEvalSubMode;
32
- /** Model providers to include */
33
- models?: {
34
- id: string;
35
- label: string;
36
- config?: Record<string, unknown>;
37
- }[];
38
- /** Rubric config (templates, weights, profiles) — loaded from rubrics config */
39
- rubricConfig?: RubricConfig;
40
- }
41
- /** Minimal rubric config needed by the handler */
42
- export interface RubricConfig {
43
- templates: Record<string, {
44
- dimension?: string;
45
- header: string;
46
- scale: string[];
47
- criteria_label?: string;
48
- }>;
49
- }
50
- /** Result of compiling a single literacy task */
51
- export interface LiteracyCompileResult {
52
- /** Promptfoo provider configs */
53
- providers: PromptfooProvider[];
54
- /** Compiled test cases (gold + optional baseline) */
55
- tests: PromptfooTestCase[];
56
- /** Prompts for evaluation */
57
- prompts: PromptfooPrompt[];
58
- /** Warnings generated during compilation */
59
- warnings: string[];
60
- }
61
- export interface LiteracyValidationError {
62
- field: string;
63
- message: string;
64
- }
65
- /**
66
- * Validate a literacy task definition.
67
- */
68
- export declare function validateLiteracyTask(task: LiteracyTaskDefinition): LiteracyValidationError[];
69
- /**
70
- * Compile a literacy task into Promptfoo configuration.
71
- *
72
- * Produces the same structure as the legacy expand-tasks.ts path:
73
- * - Gold entry with with-docs prompt and canonical doc context
74
- * - Baseline entry with without-docs prompt and empty docs
75
- * - Rubric assertions with structured dimension metadata
76
- */
77
- export declare function compileLiteracyTask(task: LiteracyTaskDefinition, options?: LiteracyCompileOptions): LiteracyCompileResult;
78
- /**
79
- * ModeHandler-conformant export for the literacy evaluation mode.
80
- *
81
- * The pipeline looks up this handler via `registry.getMode("literacy")`
82
- * and calls `handler.compileTask()`. The handler narrows the union to
83
- * LiteracyTaskDefinition and delegates to `compileLiteracyTask()`.
84
- *
85
- * Note: The literacy handler's `evalMode` variant ("baseline" vs "agentic")
86
- * is passed via `ctx.evalMode` — a literacy-specific extension of
87
- * CompilationContext. The pipeline sets this when compiling literacy tasks.
88
- */
89
- export declare const handler: ModeHandler;