@sanity/ailf 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/LICENSE +21 -0
  2. package/dist/cli.js +0 -0
  3. package/dist/orchestration/steps/run-eval-step.js +1 -1
  4. package/dist/pipeline/checks.d.ts +8 -3
  5. package/dist/pipeline/checks.js +23 -3
  6. package/package.json +25 -25
  7. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +0 -10
  8. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +0 -185
  9. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +0 -6
  10. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +0 -42
  11. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  12. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  13. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  14. package/dist/_vendor/ailf-tasks/index.js +0 -16
  15. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  16. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  17. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  18. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  19. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  20. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  21. package/dist/adapters/task-sources/yaml-task-source.d.ts +0 -18
  22. package/dist/adapters/task-sources/yaml-task-source.js +0 -139
  23. package/dist/agent-observer/test-imports.d.ts +0 -7
  24. package/dist/agent-observer/test-imports.js +0 -185
  25. package/dist/commands/update-quality-scores.d.ts +0 -5
  26. package/dist/commands/update-quality-scores.js +0 -20
  27. package/dist/lib/agent-behavior-report.d.ts +0 -8
  28. package/dist/lib/agent-behavior-report.js +0 -185
  29. package/dist/lib/baseline.d.ts +0 -19
  30. package/dist/lib/baseline.js +0 -153
  31. package/dist/lib/calculate-scores.d.ts +0 -23
  32. package/dist/lib/calculate-scores.js +0 -42
  33. package/dist/lib/compare.d.ts +0 -18
  34. package/dist/lib/compare.js +0 -170
  35. package/dist/lib/coverage-audit.d.ts +0 -4
  36. package/dist/lib/coverage-audit.js +0 -42
  37. package/dist/lib/discovery-report.d.ts +0 -13
  38. package/dist/lib/discovery-report.js +0 -57
  39. package/dist/lib/fetch-docs.d.ts +0 -30
  40. package/dist/lib/fetch-docs.js +0 -171
  41. package/dist/lib/generate-configs.d.ts +0 -25
  42. package/dist/lib/generate-configs.js +0 -42
  43. package/dist/lib/grader-api.d.ts +0 -21
  44. package/dist/lib/grader-api.js +0 -34
  45. package/dist/lib/grader-compare.d.ts +0 -19
  46. package/dist/lib/grader-compare.js +0 -91
  47. package/dist/lib/grader-consistency.d.ts +0 -27
  48. package/dist/lib/grader-consistency.js +0 -79
  49. package/dist/lib/grader-sensitivity.d.ts +0 -19
  50. package/dist/lib/grader-sensitivity.js +0 -75
  51. package/dist/lib/grader-validate.d.ts +0 -19
  52. package/dist/lib/grader-validate.js +0 -78
  53. package/dist/lib/measure-retrieval.d.ts +0 -14
  54. package/dist/lib/measure-retrieval.js +0 -71
  55. package/dist/lib/pr-comment.d.ts +0 -16
  56. package/dist/lib/pr-comment.js +0 -28
  57. package/dist/lib/readiness-report.d.ts +0 -13
  58. package/dist/lib/readiness-report.js +0 -108
  59. package/dist/lib/webhook-server.d.ts +0 -11
  60. package/dist/lib/webhook-server.js +0 -24
  61. package/dist/lib/weekly-digest.d.ts +0 -24
  62. package/dist/lib/weekly-digest.js +0 -148
  63. package/dist/orchestration/env-bridge.d.ts +0 -21
  64. package/dist/orchestration/env-bridge.js +0 -66
  65. package/dist/orchestration/steps/fetch-docs-shell.d.ts +0 -17
  66. package/dist/orchestration/steps/fetch-docs-shell.js +0 -30
  67. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +0 -9
  68. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +0 -339
  69. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +0 -70
  70. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
  71. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
  72. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
  73. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
  74. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
  75. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +0 -50
  76. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +0 -334
  77. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -69
  78. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -307
  79. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +0 -65
  80. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +0 -368
  81. package/dist/pipeline/compiler/task-bridge.d.ts +0 -41
  82. package/dist/pipeline/compiler/task-bridge.js +0 -92
  83. package/dist/pipeline/expand-tasks.d.ts +0 -232
  84. package/dist/pipeline/expand-tasks.js +0 -467
  85. package/dist/pipeline/generate-configs.d.ts +0 -92
  86. package/dist/pipeline/generate-configs.js +0 -445
  87. package/dist/pipeline/steps/calculate-scores-step.d.ts +0 -11
  88. package/dist/pipeline/steps/calculate-scores-step.js +0 -89
  89. package/dist/pipeline/steps/compare-step.d.ts +0 -18
  90. package/dist/pipeline/steps/compare-step.js +0 -90
  91. package/dist/pipeline/steps/eval-step.d.ts +0 -53
  92. package/dist/pipeline/steps/eval-step.js +0 -347
  93. package/dist/pipeline/steps/fetch-docs-step.d.ts +0 -11
  94. package/dist/pipeline/steps/fetch-docs-step.js +0 -84
  95. package/dist/pipeline/steps/generate-configs-step.d.ts +0 -11
  96. package/dist/pipeline/steps/generate-configs-step.js +0 -98
  97. package/dist/pipeline/steps/grader-consistency-step.d.ts +0 -21
  98. package/dist/pipeline/steps/grader-consistency-step.js +0 -74
  99. package/dist/pipeline/steps/publish-report-step.d.ts +0 -57
  100. package/dist/pipeline/steps/publish-report-step.js +0 -243
  101. package/dist/pipeline/steps/report-step.d.ts +0 -13
  102. package/dist/pipeline/steps/report-step.js +0 -56
  103. package/dist/pipeline/steps/update-scores-step.d.ts +0 -11
  104. package/dist/pipeline/steps/update-scores-step.js +0 -42
  105. package/dist/scripts/agent-behavior-report.d.ts +0 -19
  106. package/dist/scripts/agent-behavior-report.js +0 -315
  107. package/dist/scripts/baseline.d.ts +0 -43
  108. package/dist/scripts/baseline.js +0 -267
  109. package/dist/scripts/calculate-scores.d.ts +0 -166
  110. package/dist/scripts/calculate-scores.js +0 -1296
  111. package/dist/scripts/compare.d.ts +0 -22
  112. package/dist/scripts/compare.js +0 -334
  113. package/dist/scripts/coverage-audit.d.ts +0 -44
  114. package/dist/scripts/coverage-audit.js +0 -209
  115. package/dist/scripts/debug-eval.d.ts +0 -19
  116. package/dist/scripts/debug-eval.js +0 -73
  117. package/dist/scripts/discovery-report.d.ts +0 -58
  118. package/dist/scripts/discovery-report.js +0 -250
  119. package/dist/scripts/fetch-docs.d.ts +0 -35
  120. package/dist/scripts/fetch-docs.js +0 -472
  121. package/dist/scripts/generate-configs.d.ts +0 -66
  122. package/dist/scripts/generate-configs.js +0 -459
  123. package/dist/scripts/grader-api.d.ts +0 -27
  124. package/dist/scripts/grader-api.js +0 -206
  125. package/dist/scripts/grader-compare.d.ts +0 -22
  126. package/dist/scripts/grader-compare.js +0 -368
  127. package/dist/scripts/grader-consistency.d.ts +0 -20
  128. package/dist/scripts/grader-consistency.js +0 -313
  129. package/dist/scripts/grader-sensitivity.d.ts +0 -22
  130. package/dist/scripts/grader-sensitivity.js +0 -354
  131. package/dist/scripts/grader-validate.d.ts +0 -19
  132. package/dist/scripts/grader-validate.js +0 -267
  133. package/dist/scripts/measure-retrieval.d.ts +0 -10
  134. package/dist/scripts/measure-retrieval.js +0 -145
  135. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +0 -24
  136. package/dist/scripts/migrate-tasks-to-content-lake.js +0 -328
  137. package/dist/scripts/pipeline.d.ts +0 -76
  138. package/dist/scripts/pipeline.js +0 -1031
  139. package/dist/scripts/pr-comment.d.ts +0 -10
  140. package/dist/scripts/pr-comment.js +0 -510
  141. package/dist/scripts/readiness-report.d.ts +0 -88
  142. package/dist/scripts/readiness-report.js +0 -342
  143. package/dist/scripts/update-quality-scores.d.ts +0 -15
  144. package/dist/scripts/update-quality-scores.js +0 -184
  145. package/dist/scripts/validate-task-sources.d.ts +0 -21
  146. package/dist/scripts/validate-task-sources.js +0 -210
  147. package/dist/scripts/validate.d.ts +0 -13
  148. package/dist/scripts/validate.js +0 -79
  149. package/dist/scripts/webhook-server.d.ts +0 -26
  150. package/dist/scripts/webhook-server.js +0 -147
  151. package/dist/scripts/weekly-digest.d.ts +0 -24
  152. package/dist/scripts/weekly-digest.js +0 -144
  153. package/dist/sinks/format-slack.d.ts +0 -64
  154. package/dist/sinks/format-slack.js +0 -306
  155. package/dist/sinks/slack-sink.d.ts +0 -27
  156. package/dist/sinks/slack-sink.js +0 -78
  157. package/dist/sinks/webhook-sink.d.ts +0 -19
  158. package/dist/sinks/webhook-sink.js +0 -50
  159. package/tasks/.expanded.agentic.yaml +0 -280
  160. package/tasks/.expanded.yaml +0 -565
@@ -1,472 +0,0 @@
1
- /**
2
- * Fetch-docs.ts
3
- *
4
- * Pulls documentation from the Sanity CMS and generates markdown context
5
- * files for use in Promptfoo evaluations. Always produces canonical contexts;
6
- * other outputs are opt-in:
7
- *
8
- * 1. Canonical contexts — one file per evaluation task, containing
9
- * only the manually-annotated "gold" documents for that task (always)
10
- * 2. Feature-area contexts — one file per GROQ feature area query
11
- * (opt-in via --include-feature-areas)
12
- * 3. Full corpus — all articles in one file
13
- * (opt-in via --include-corpus)
14
- */
15
- // oxlint-disable-next-line import/no-unassigned-import -- side-effect: loads .env into process.env
16
- import "dotenv/config";
17
- import { mkdirSync, writeFileSync } from "fs";
18
- import { dirname, join } from "path";
19
- import { fetchUrlContent, } from "../pipeline/fetch-url-content.js";
20
- import { resolveMappings, } from "../pipeline/resolve-mappings.js";
21
- import { createPerspectiveClient, createPublishedClient, getSanityClient, } from "../sanity/client.js";
22
- import { ALL_ARTICLES_QUERY, ALL_FEATURE_AREAS, ARTICLES_METADATA_BY_SLUGS_QUERY, ARTICLE_BY_ID_QUERY, ARTICLE_BY_SLUG_QUERY, ARTICLE_BY_SLUG_WITH_PERSPECTIVE_QUERY, FEATURE_AREA_QUERIES, } from "../sanity/queries.js";
23
- import { toMarkdown } from "../sanity/portable-text.js";
24
- import { loadSource } from "../sources.js";
25
- // ---------------------------------------------------------------------------
26
- // Helpers
27
- // ---------------------------------------------------------------------------
28
- const ROOT = join(dirname(new URL(import.meta.url).pathname), "..", "..");
29
- /**
30
- * Escape `{{` and `}}` so Promptfoo's Nunjucks template engine doesn't
31
- * try to interpret them as template variables (e.g. Vue `{{ post.title }}`
32
- * in Nuxt docs would break Nunjucks parsing).
33
- *
34
- * Uses a single-pass regex so replacement text isn't re-scanned.
35
- */
36
- function escapeNunjucks(text) {
37
- return text.replace(/\{\{|\}\}/g, (match) => match === "{{" ? '{{ "{{" }}' : '{{ "}}" }}');
38
- }
39
- function estimateTokens(text) {
40
- return Math.ceil(text.length / 4);
41
- }
42
- // ---------------------------------------------------------------------------
43
- // Perspective diffing
44
- // ---------------------------------------------------------------------------
45
- /**
46
- * Fetch a single article by its document ID.
47
- *
48
- * When a perspective is active, the document is fetched through the
49
- * perspective client — this is the natural behavior for Studio document
50
- * URLs that include a ?perspective= parameter.
51
- */
52
- async function fetchArticleById(id, source) {
53
- const client = source.perspective
54
- ? createPerspectiveClient(source.perspective, source)
55
- : getSanityClient();
56
- return client.fetch(ARTICLE_BY_ID_QUERY, { id });
57
- }
58
- async function fetchArticleBySlug(slug) {
59
- const client = getSanityClient();
60
- const doc = await client.fetch(ARTICLE_BY_SLUG_QUERY, {
61
- slug,
62
- });
63
- if (!doc) {
64
- console.warn(` [warn] No article found for slug "${slug}"`);
65
- return "";
66
- }
67
- return formatArticle(doc);
68
- }
69
- /**
70
- * Fetch a single article by slug through a perspective-enabled client.
71
- */
72
- async function fetchArticleBySlugWithPerspective(slug, source) {
73
- if (!source.perspective) {
74
- return fetchArticleBySlug(slug);
75
- }
76
- const client = createPerspectiveClient(source.perspective, source);
77
- const doc = await client.fetch(ARTICLE_BY_SLUG_WITH_PERSPECTIVE_QUERY, { slug });
78
- if (!doc) {
79
- console.warn(` [warn] No article found for slug "${slug}" in perspective "${source.perspective}"`);
80
- return "";
81
- }
82
- return formatArticle(doc);
83
- }
84
- // ---------------------------------------------------------------------------
85
- // Document ID overlay
86
- // ---------------------------------------------------------------------------
87
- function formatArticle(doc) {
88
- const sectionLabel = doc.section ? `Section: ${doc.section.title}\n` : "";
89
- const desc = doc.description ? `${doc.description}\n\n` : "";
90
- // Convert Portable Text to Markdown
91
- const ptBlocks = doc.content ?? [];
92
- const markdown = toMarkdown(ptBlocks);
93
- return `## ${doc.title}\n\n${sectionLabel}${desc}${markdown}`;
94
- }
95
- async function generateCanonicalContexts(source) {
96
- const mappings = resolveMappings(ROOT);
97
- const canonicalDir = join(ROOT, "contexts", "canonical");
98
- mkdirSync(canonicalDir, { recursive: true });
99
- const hasPerspective = Boolean(source.perspective);
100
- const hasDocumentIds = source.documentIds !== undefined && source.documentIds.length > 0;
101
- const hasDirectUrls = source.urls.length > 0;
102
- // Build descriptive header
103
- const parts = [];
104
- if (hasPerspective) {
105
- parts.push(`perspective "${source.perspective}"`);
106
- }
107
- if (hasDocumentIds) {
108
- parts.push(`${source.documentIds.length} document overlay(s)`);
109
- }
110
- if (hasDirectUrls) {
111
- parts.push(`${source.urls.length} direct URL(s)`);
112
- }
113
- if (parts.length > 0) {
114
- console.log(`\nGenerating canonical contexts with ${parts.join(" + ")}...\n`);
115
- }
116
- else {
117
- console.log("\nGenerating canonical (gold-retrieval) contexts...\n");
118
- }
119
- // Collect all canonical slugs across all tasks for a single diff pass
120
- const allSlugs = new Set();
121
- for (const areaData of Object.values(mappings.feature_areas)) {
122
- for (const task of areaData.tasks) {
123
- for (const doc of task.canonical_docs) {
124
- allSlugs.add(doc.slug);
125
- }
126
- }
127
- }
128
- // Fetch metadata for all canonical docs and write the document manifest.
129
- // This captures _id, _rev, slug, title at evaluation time for traceability.
130
- const metadataClient = hasPerspective
131
- ? createPerspectiveClient(source.perspective, source)
132
- : getSanityClient();
133
- const allMetadata = await metadataClient.fetch(ARTICLES_METADATA_BY_SLUGS_QUERY, { slugs: [...allSlugs] });
134
- const manifestPath = join(ROOT, "contexts", "document-manifest.json");
135
- const manifest = allMetadata
136
- .map((m) => ({ _id: m._id, _rev: m._rev, slug: m.slug, title: m.title }))
137
- .sort((a, b) => a.slug.localeCompare(b.slug));
138
- writeFileSync(manifestPath, JSON.stringify(manifest, null, 2));
139
- console.log(` 📋 Document manifest: ${manifest.length} docs → contexts/document-manifest.json\n`);
140
- // If a perspective is active, diff all canonical slugs at once
141
- let releaseImpact;
142
- if (hasPerspective) {
143
- releaseImpact = await identifyAffectedDocs(source, [...allSlugs]);
144
- const affected = releaseImpact.added.length +
145
- releaseImpact.modified.length +
146
- releaseImpact.removed.length;
147
- console.log(` Perspective diff: ${affected} of ${allSlugs.size} canonical docs affected`);
148
- if (releaseImpact.added.length > 0) {
149
- console.log(` Added: ${releaseImpact.added.join(", ")}`);
150
- }
151
- if (releaseImpact.modified.length > 0) {
152
- console.log(` Modified: ${releaseImpact.modified.join(", ")}`);
153
- }
154
- if (releaseImpact.removed.length > 0) {
155
- console.log(` Removed: ${releaseImpact.removed.join(", ")}`);
156
- }
157
- console.log();
158
- }
159
- // If document IDs are specified, resolve them against the canonical set
160
- let documentOverlay;
161
- if (hasDocumentIds) {
162
- console.log(" Resolving " +
163
- source.documentIds.length +
164
- " document ID(s) against canonical set...");
165
- documentOverlay = await resolveDocumentOverlay(source.documentIds, allSlugs, source);
166
- console.log(" Document overlay: " +
167
- documentOverlay.replacements.size +
168
- " replacement(s), " +
169
- documentOverlay.appendedContent.length +
170
- " appended");
171
- console.log();
172
- }
173
- // If direct URLs are specified, fetch their content
174
- const urlContent = [];
175
- const urlFetchMetadata = [];
176
- if (source.urls.length > 0) {
177
- console.log(` Fetching ${source.urls.length} direct URL(s)...`);
178
- for (const url of source.urls) {
179
- const result = await fetchUrlContent(url, source.headers);
180
- urlFetchMetadata.push({
181
- contentLength: result.content?.length,
182
- error: result.error,
183
- method: result.method,
184
- status: result.status,
185
- url: result.url,
186
- });
187
- if (result.content) {
188
- urlContent.push(result.content);
189
- const tokens = estimateTokens(result.content);
190
- console.log(` ✅ ${url} (via ${result.method}, ~${tokens} tokens)`);
191
- }
192
- else {
193
- console.warn(` ⚠️ ${url}: ${result.error}`);
194
- }
195
- }
196
- console.log();
197
- }
198
- const affectedSlugs = new Set([
199
- ...(releaseImpact?.added ?? []),
200
- ...(releaseImpact?.modified ?? []),
201
- ]);
202
- const removedSlugs = new Set(releaseImpact?.removed ?? []);
203
- for (const [area, areaData] of Object.entries(mappings.feature_areas)) {
204
- console.log(` Feature area: ${area}`);
205
- for (const task of areaData.tasks) {
206
- console.log(` Task: ${task.id}`);
207
- const docContents = [];
208
- for (const doc of task.canonical_docs) {
209
- if (removedSlugs.has(doc.slug)) {
210
- console.log(` ⚠️ Skipping: ${doc.slug} (removed in perspective)`);
211
- continue;
212
- }
213
- // Check if this slug has a document overlay replacement
214
- if (documentOverlay?.replacements.has(doc.slug)) {
215
- console.log(` Fetching: ${doc.slug} (from document overlay)`);
216
- docContents.push(documentOverlay.replacements.get(doc.slug));
217
- }
218
- else if (affectedSlugs.has(doc.slug)) {
219
- console.log(` Fetching: ${doc.slug} (from perspective)`);
220
- const content = await fetchArticleBySlugWithPerspective(doc.slug, source);
221
- if (content) {
222
- docContents.push(content);
223
- }
224
- }
225
- else {
226
- console.log(` Fetching: ${doc.slug}`);
227
- const content = await fetchArticleBySlug(doc.slug);
228
- if (content) {
229
- docContents.push(content);
230
- }
231
- }
232
- }
233
- // Append any extra documents from the overlay that didn't match
234
- // Canonical slugs — these are added to every task's context
235
- if (documentOverlay && documentOverlay.appendedContent.length > 0) {
236
- docContents.push(...documentOverlay.appendedContent);
237
- console.log(` + ${documentOverlay.appendedContent.length} appended document(s) from overlay`);
238
- }
239
- // Append URL-fetched content (from --url classified as direct-url)
240
- if (urlContent.length > 0) {
241
- docContents.push(...urlContent);
242
- console.log(` + ${urlContent.length} URL-fetched document(s)`);
243
- }
244
- const combined = docContents.join("\n\n---\n\n");
245
- const outPath = join(canonicalDir, `${task.id}.md`);
246
- writeFileSync(outPath, escapeNunjucks(combined));
247
- console.log(` -> ${task.id}.md: ~${estimateTokens(combined)} tokens`);
248
- }
249
- }
250
- // Write release impact file for downstream consumption (scoring, reports)
251
- if (releaseImpact) {
252
- const impactPath = join(ROOT, "contexts", "release-impact.json");
253
- writeFileSync(impactPath, JSON.stringify(releaseImpact, null, 2));
254
- console.log(`\n 📄 Release impact written to contexts/release-impact.json`);
255
- }
256
- // Write document overlay metadata for downstream consumption
257
- if (documentOverlay) {
258
- const overlayMeta = {
259
- appendedCount: documentOverlay.appendedContent.length,
260
- documentIds: source.documentIds,
261
- replacedSlugs: [...documentOverlay.replacements.keys()],
262
- };
263
- const overlayPath = join(ROOT, "contexts", "document-overlay.json");
264
- writeFileSync(overlayPath, JSON.stringify(overlayMeta, null, 2));
265
- console.log("\n 📄 Document overlay written to contexts/document-overlay.json");
266
- }
267
- // Write URL fetch metadata for downstream consumption
268
- if (urlFetchMetadata.length > 0) {
269
- const fetched = urlFetchMetadata.filter((m) => m.method !== "failed");
270
- const failures = urlFetchMetadata.filter((m) => m.method === "failed");
271
- const meta = {
272
- failures: failures.map((f) => ({ error: f.error, url: f.url })),
273
- fetchedUrls: fetched,
274
- totalFailed: failures.length,
275
- totalFetched: fetched.length,
276
- };
277
- const urlFetchPath = join(ROOT, "contexts", "url-fetch.json");
278
- writeFileSync(urlFetchPath, JSON.stringify(meta, null, 2));
279
- console.log("\n 📄 URL fetch metadata written to contexts/url-fetch.json");
280
- }
281
- }
282
- // ---------------------------------------------------------------------------
283
- // Feature-area contexts
284
- // ---------------------------------------------------------------------------
285
- async function generateFeatureAreaContexts(source) {
286
- const client = getSanityClient(undefined, source);
287
- const contextsDir = join(ROOT, "contexts");
288
- mkdirSync(contextsDir, { recursive: true });
289
- console.log("Generating feature-area contexts...\n");
290
- for (const feature of ALL_FEATURE_AREAS) {
291
- const query = FEATURE_AREA_QUERIES[feature];
292
- const docs = await client.fetch(query);
293
- if (docs.length === 0) {
294
- console.warn(` [warn] No articles found for "${feature}"`);
295
- continue;
296
- }
297
- const combined = docs.map(formatArticle).join("\n\n---\n\n");
298
- const outPath = join(contextsDir, `${feature}.md`);
299
- writeFileSync(outPath, escapeNunjucks(combined));
300
- console.log(` ${feature}: ${docs.length} articles, ~${estimateTokens(combined)} tokens`);
301
- }
302
- }
303
- // ---------------------------------------------------------------------------
304
- // Canonical (gold-retrieval) contexts
305
- // ---------------------------------------------------------------------------
306
- async function generateFullCorpus(source) {
307
- const client = getSanityClient(undefined, source);
308
- console.log("\nGenerating full corpus...");
309
- const docs = await client.fetch(ALL_ARTICLES_QUERY);
310
- const corpus = docs
311
- .map((d) => {
312
- const ptBlocks = d.content ?? [];
313
- const markdown = toMarkdown(ptBlocks);
314
- return (`## ${d.title}\n\n` +
315
- // oxlint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- empty string title should fall back to "General"
316
- `Section: ${d.section?.title || "General"}\n` +
317
- `URL: ${source.baseUrl}/${d.slug}\n\n` +
318
- markdown);
319
- })
320
- .join("\n\n---\n\n");
321
- const outDir = join(ROOT, "contexts");
322
- mkdirSync(outDir, { recursive: true });
323
- writeFileSync(join(outDir, "full-corpus.md"), escapeNunjucks(corpus));
324
- console.log(` full-corpus.md: ${docs.length} articles, ~${estimateTokens(corpus)} tokens`);
325
- }
326
- /**
327
- * Compare canonical documents between the published dataset and a perspective
328
- * to identify which docs have been added, modified, or removed.
329
- *
330
- * The comparison uses _rev to detect modifications — if the _rev differs
331
- * between published and perspective, the document has been modified.
332
- */
333
- async function identifyAffectedDocs(source, canonicalSlugs) {
334
- if (!source.perspective) {
335
- return {
336
- added: [],
337
- modified: [],
338
- removed: [],
339
- unchanged: [...canonicalSlugs],
340
- };
341
- }
342
- const publishedClient = createPublishedClient(source);
343
- const perspectiveClient = createPerspectiveClient(source.perspective, source);
344
- const [publishedMeta, perspectiveMeta] = await Promise.all([
345
- publishedClient.fetch(ARTICLES_METADATA_BY_SLUGS_QUERY, {
346
- slugs: canonicalSlugs,
347
- }),
348
- perspectiveClient.fetch(ARTICLES_METADATA_BY_SLUGS_QUERY, { slugs: canonicalSlugs }),
349
- ]);
350
- const publishedMap = new Map(publishedMeta.map((d) => [d.slug, d]));
351
- const perspectiveMap = new Map(perspectiveMeta.map((d) => [d.slug, d]));
352
- const added = [];
353
- const modified = [];
354
- const removed = [];
355
- const unchanged = [];
356
- for (const slug of canonicalSlugs) {
357
- const pub = publishedMap.get(slug);
358
- const persp = perspectiveMap.get(slug);
359
- if (!pub && persp) {
360
- added.push(slug);
361
- }
362
- else if (pub && !persp) {
363
- removed.push(slug);
364
- }
365
- else if (pub && persp && pub._rev !== persp._rev) {
366
- modified.push(slug);
367
- }
368
- else {
369
- unchanged.push(slug);
370
- }
371
- }
372
- return { added, modified, removed, unchanged };
373
- }
374
- // ---------------------------------------------------------------------------
375
- // Full corpus (optional — useful for retrieval experiments)
376
- // ---------------------------------------------------------------------------
377
- async function main() {
378
- console.log("=== ai-literacy-framework — Documentation Fetcher ===\n");
379
- const args = process.argv.slice(2);
380
- const includeFeatureAreas = args.includes("--include-feature-areas");
381
- const includeCorpus = args.includes("--include-corpus");
382
- // Parse --source <name> argument
383
- const sourceIdx = args.indexOf("--source");
384
- const sourceName = sourceIdx !== -1 ? args[sourceIdx + 1] : undefined;
385
- const source = loadSource(sourceName);
386
- console.log(` Source: ${sourceName ?? "default (production)"}`);
387
- console.log(` Base URL: ${source.baseUrl}`);
388
- if (source.dataset) {
389
- console.log(` Dataset: ${source.dataset}`);
390
- }
391
- if (source.perspective) {
392
- console.log(` Perspective: ${source.perspective}`);
393
- }
394
- if (source.documentIds && source.documentIds.length > 0) {
395
- console.log(` Documents: ${source.documentIds.length} document ID(s)`);
396
- for (const id of source.documentIds) {
397
- console.log(` ${id}`);
398
- }
399
- }
400
- if (source.urls.length > 0) {
401
- console.log(` URLs: ${source.urls.length} direct URL(s)`);
402
- for (const u of source.urls) {
403
- console.log(` ${u}`);
404
- }
405
- }
406
- console.log();
407
- if (includeFeatureAreas) {
408
- await generateFeatureAreaContexts(source);
409
- }
410
- await generateCanonicalContexts(source);
411
- if (includeCorpus) {
412
- await generateFullCorpus(source);
413
- }
414
- console.log("\nDone!");
415
- }
416
- // ---------------------------------------------------------------------------
417
- // Main
418
- // ---------------------------------------------------------------------------
419
- /**
420
- * Resolve document IDs into a DocumentOverlay that describes how they
421
- * interact with the canonical doc set.
422
- *
423
- * For each document ID:
424
- * 1. Fetch the document from Sanity (through perspective if active)
425
- * 2. Check if the fetched document's slug matches a canonical slug
426
- * 3. If it matches -> add to replacements (replaces the canonical fetch)
427
- * 4. If it doesn't match -> add to appendedContent (extra context)
428
- *
429
- * @param documentIds - Array of Sanity document IDs to resolve
430
- * @param canonicalSlugs - Set of all canonical doc slugs across all tasks
431
- * @param source - The resolved documentation source configuration
432
- */
433
- async function resolveDocumentOverlay(documentIds, canonicalSlugs, source) {
434
- const overlay = {
435
- appendedContent: [],
436
- replacements: new Map(),
437
- };
438
- if (documentIds.length === 0) {
439
- return overlay;
440
- }
441
- // Fetch all documents in parallel
442
- const results = await Promise.all(documentIds.map(async (id) => {
443
- const doc = await fetchArticleById(id, source);
444
- return { doc, id };
445
- }));
446
- for (const { doc, id } of results) {
447
- if (!doc) {
448
- console.warn(' [warn] No article found for document ID "' + id + '"');
449
- continue;
450
- }
451
- const content = formatArticle(doc);
452
- if (!content) {
453
- continue;
454
- }
455
- if (doc.slug && canonicalSlugs.has(doc.slug)) {
456
- // This document matches a canonical slug — replace it
457
- overlay.replacements.set(doc.slug, content);
458
- console.log(" 📄 Document " + id + ' → replaces canonical doc "' + doc.slug + '"');
459
- }
460
- else {
461
- // This document doesn't match any canonical slug — append it
462
- overlay.appendedContent.push(content);
463
- const slugInfo = doc.slug ? ' (slug: "' + doc.slug + '")' : "";
464
- console.log(" 📄 Document " + id + " → appended as additional context" + slugInfo);
465
- }
466
- }
467
- return overlay;
468
- }
469
- main().catch((err) => {
470
- console.error("Fatal error:", err);
471
- process.exit(1);
472
- });
@@ -1,66 +0,0 @@
1
- /**
2
- * Generate-configs.ts
3
- *
4
- * Reads config/models.yaml (the central model registry) and generates all
5
- * promptfoo config files with the correct provider entries.
6
- *
7
- * This keeps model definitions in one place — add a model to config/models.yaml
8
- * and run `pnpm generate-configs` to propagate it to all eval modes.
9
- *
10
- * Generated configs:
11
- * - promptfooconfig.yaml (baseline: with-docs vs without-docs)
12
- * - promptfooconfig.observed.yaml (instrumented HTTP recording)
13
- * - promptfooconfig.agentic.yaml (agentic tool-calling: naive vs optimized)
14
- *
15
- * Usage:
16
- * pnpm generate-configs
17
- * # or
18
- * tsx src/scripts/generate-configs.ts
19
- */
20
- import type { ModelEntry } from "../pipeline/types.js";
21
- /** Auto-discover all task YAML files in the tasks/ directory. */
22
- export declare function discoverTaskFiles(rootDir: string): string[];
23
- interface LoadedPrompts {
24
- agentic: {
25
- id: string;
26
- label: string;
27
- raw: string;
28
- };
29
- withDocs: {
30
- id: string;
31
- label: string;
32
- raw: string;
33
- };
34
- withoutDocs: {
35
- id: string;
36
- label: string;
37
- raw: string;
38
- };
39
- }
40
- /**
41
- * Extract the raw API model name from a promptfoo provider ID.
42
- *
43
- * Promptfoo IDs encode the provider + sub-protocol + model, e.g.:
44
- * "openai:chat:gpt-5.2" → "gpt-5.2"
45
- * "anthropic:messages:claude-opus-4-6" → "claude-opus-4-6"
46
- * "openai:gpt-4o" → "gpt-4o"
47
- * "google:gemini-2.5-pro" → "gemini-2.5-pro"
48
- *
49
- * Falls back to stripping everything before the first colon for unknown
50
- * providers (e.g., "openrouter:deepseek/deepseek-r1" → "deepseek/deepseek-r1").
51
- */
52
- export declare function extractModelName(id: string): string;
53
- /**
54
- * Extract the LLM provider family from a promptfoo provider ID.
55
- *
56
- * "openai:chat:gpt-5.2" → "openai"
57
- * "anthropic:messages:claude-opus-4-6" → "anthropic"
58
- * "google:gemini-2.5-pro" → "google"
59
- */
60
- export declare function extractProvider(id: string): string;
61
- /** Load prompt templates from config/prompts.yaml. Throws if missing or malformed. */
62
- export declare function loadPrompts(rootDir: string): LoadedPrompts;
63
- /** Merge default config with model-specific config */
64
- export declare function mergeConfig(defaults: Record<string, unknown>, modelConfig?: Record<string, unknown>, overrides?: Record<string, unknown>): Record<string, unknown>;
65
- export declare function modelMatchesMode(model: ModelEntry, mode: string): boolean;
66
- export {};