@sanity/ailf 2.0.2 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +50 -1
- package/dist/_vendor/ailf-core/examples/index.js +66 -1
- package/dist/agent-harness/assertions-runtime.d.ts +49 -0
- package/dist/agent-harness/assertions-runtime.js +138 -0
- package/dist/agent-harness/provider.d.ts +58 -0
- package/dist/agent-harness/provider.js +104 -0
- package/dist/cli.js +0 -0
- package/dist/commands/init.js +3 -0
- package/dist/orchestration/steps/generate-configs-step.d.ts +7 -0
- package/dist/orchestration/steps/generate-configs-step.js +35 -2
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +39 -25
- package/dist/pipeline/compiler/compiler-to-yaml.js +78 -7
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +28 -85
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +22 -15
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +8 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +42 -12
- package/package.json +25 -24
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +0 -10
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +0 -185
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +0 -6
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +0 -42
- package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
- package/dist/_vendor/ailf-tasks/cli.js +0 -61
- package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
- package/dist/_vendor/ailf-tasks/index.js +0 -16
- package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
- package/dist/_vendor/ailf-tasks/parser.js +0 -73
- package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
- package/dist/_vendor/ailf-tasks/schemas.js +0 -180
- package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
- package/dist/_vendor/ailf-tasks/validation.js +0 -162
- package/dist/adapters/task-sources/yaml-task-source.d.ts +0 -18
- package/dist/adapters/task-sources/yaml-task-source.js +0 -139
- package/dist/agent-observer/test-imports.d.ts +0 -7
- package/dist/agent-observer/test-imports.js +0 -185
- package/dist/commands/update-quality-scores.d.ts +0 -5
- package/dist/commands/update-quality-scores.js +0 -20
- package/dist/lib/agent-behavior-report.d.ts +0 -8
- package/dist/lib/agent-behavior-report.js +0 -185
- package/dist/lib/baseline.d.ts +0 -19
- package/dist/lib/baseline.js +0 -153
- package/dist/lib/calculate-scores.d.ts +0 -23
- package/dist/lib/calculate-scores.js +0 -42
- package/dist/lib/compare.d.ts +0 -18
- package/dist/lib/compare.js +0 -170
- package/dist/lib/coverage-audit.d.ts +0 -4
- package/dist/lib/coverage-audit.js +0 -42
- package/dist/lib/discovery-report.d.ts +0 -13
- package/dist/lib/discovery-report.js +0 -57
- package/dist/lib/fetch-docs.d.ts +0 -30
- package/dist/lib/fetch-docs.js +0 -171
- package/dist/lib/generate-configs.d.ts +0 -25
- package/dist/lib/generate-configs.js +0 -42
- package/dist/lib/grader-api.d.ts +0 -21
- package/dist/lib/grader-api.js +0 -34
- package/dist/lib/grader-compare.d.ts +0 -19
- package/dist/lib/grader-compare.js +0 -91
- package/dist/lib/grader-consistency.d.ts +0 -27
- package/dist/lib/grader-consistency.js +0 -79
- package/dist/lib/grader-sensitivity.d.ts +0 -19
- package/dist/lib/grader-sensitivity.js +0 -75
- package/dist/lib/grader-validate.d.ts +0 -19
- package/dist/lib/grader-validate.js +0 -78
- package/dist/lib/measure-retrieval.d.ts +0 -14
- package/dist/lib/measure-retrieval.js +0 -71
- package/dist/lib/pr-comment.d.ts +0 -16
- package/dist/lib/pr-comment.js +0 -28
- package/dist/lib/readiness-report.d.ts +0 -13
- package/dist/lib/readiness-report.js +0 -108
- package/dist/lib/webhook-server.d.ts +0 -11
- package/dist/lib/webhook-server.js +0 -24
- package/dist/lib/weekly-digest.d.ts +0 -24
- package/dist/lib/weekly-digest.js +0 -148
- package/dist/orchestration/env-bridge.d.ts +0 -21
- package/dist/orchestration/env-bridge.js +0 -66
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +0 -17
- package/dist/orchestration/steps/fetch-docs-shell.js +0 -30
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +0 -9
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +0 -339
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +0 -70
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +0 -50
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +0 -334
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -69
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -307
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +0 -65
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +0 -368
- package/dist/pipeline/compiler/task-bridge.d.ts +0 -41
- package/dist/pipeline/compiler/task-bridge.js +0 -92
- package/dist/pipeline/expand-tasks.d.ts +0 -232
- package/dist/pipeline/expand-tasks.js +0 -467
- package/dist/pipeline/generate-configs.d.ts +0 -92
- package/dist/pipeline/generate-configs.js +0 -445
- package/dist/pipeline/steps/calculate-scores-step.d.ts +0 -11
- package/dist/pipeline/steps/calculate-scores-step.js +0 -89
- package/dist/pipeline/steps/compare-step.d.ts +0 -18
- package/dist/pipeline/steps/compare-step.js +0 -90
- package/dist/pipeline/steps/eval-step.d.ts +0 -53
- package/dist/pipeline/steps/eval-step.js +0 -347
- package/dist/pipeline/steps/fetch-docs-step.d.ts +0 -11
- package/dist/pipeline/steps/fetch-docs-step.js +0 -84
- package/dist/pipeline/steps/generate-configs-step.d.ts +0 -11
- package/dist/pipeline/steps/generate-configs-step.js +0 -98
- package/dist/pipeline/steps/grader-consistency-step.d.ts +0 -21
- package/dist/pipeline/steps/grader-consistency-step.js +0 -74
- package/dist/pipeline/steps/publish-report-step.d.ts +0 -57
- package/dist/pipeline/steps/publish-report-step.js +0 -243
- package/dist/pipeline/steps/report-step.d.ts +0 -13
- package/dist/pipeline/steps/report-step.js +0 -56
- package/dist/pipeline/steps/update-scores-step.d.ts +0 -11
- package/dist/pipeline/steps/update-scores-step.js +0 -42
- package/dist/scripts/agent-behavior-report.d.ts +0 -19
- package/dist/scripts/agent-behavior-report.js +0 -315
- package/dist/scripts/baseline.d.ts +0 -43
- package/dist/scripts/baseline.js +0 -267
- package/dist/scripts/calculate-scores.d.ts +0 -166
- package/dist/scripts/calculate-scores.js +0 -1296
- package/dist/scripts/compare.d.ts +0 -22
- package/dist/scripts/compare.js +0 -334
- package/dist/scripts/coverage-audit.d.ts +0 -44
- package/dist/scripts/coverage-audit.js +0 -209
- package/dist/scripts/debug-eval.d.ts +0 -19
- package/dist/scripts/debug-eval.js +0 -73
- package/dist/scripts/discovery-report.d.ts +0 -58
- package/dist/scripts/discovery-report.js +0 -250
- package/dist/scripts/fetch-docs.d.ts +0 -35
- package/dist/scripts/fetch-docs.js +0 -472
- package/dist/scripts/generate-configs.d.ts +0 -66
- package/dist/scripts/generate-configs.js +0 -459
- package/dist/scripts/grader-api.d.ts +0 -27
- package/dist/scripts/grader-api.js +0 -206
- package/dist/scripts/grader-compare.d.ts +0 -22
- package/dist/scripts/grader-compare.js +0 -368
- package/dist/scripts/grader-consistency.d.ts +0 -20
- package/dist/scripts/grader-consistency.js +0 -313
- package/dist/scripts/grader-sensitivity.d.ts +0 -22
- package/dist/scripts/grader-sensitivity.js +0 -354
- package/dist/scripts/grader-validate.d.ts +0 -19
- package/dist/scripts/grader-validate.js +0 -267
- package/dist/scripts/measure-retrieval.d.ts +0 -10
- package/dist/scripts/measure-retrieval.js +0 -145
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +0 -24
- package/dist/scripts/migrate-tasks-to-content-lake.js +0 -328
- package/dist/scripts/pipeline.d.ts +0 -76
- package/dist/scripts/pipeline.js +0 -1031
- package/dist/scripts/pr-comment.d.ts +0 -10
- package/dist/scripts/pr-comment.js +0 -510
- package/dist/scripts/readiness-report.d.ts +0 -88
- package/dist/scripts/readiness-report.js +0 -342
- package/dist/scripts/update-quality-scores.d.ts +0 -15
- package/dist/scripts/update-quality-scores.js +0 -184
- package/dist/scripts/validate-task-sources.d.ts +0 -21
- package/dist/scripts/validate-task-sources.js +0 -210
- package/dist/scripts/validate.d.ts +0 -13
- package/dist/scripts/validate.js +0 -79
- package/dist/scripts/webhook-server.d.ts +0 -26
- package/dist/scripts/webhook-server.js +0 -147
- package/dist/scripts/weekly-digest.d.ts +0 -24
- package/dist/scripts/weekly-digest.js +0 -144
- package/dist/sinks/format-slack.d.ts +0 -64
- package/dist/sinks/format-slack.js +0 -306
- package/dist/sinks/slack-sink.d.ts +0 -27
- package/dist/sinks/slack-sink.js +0 -78
- package/dist/sinks/webhook-sink.d.ts +0 -19
- package/dist/sinks/webhook-sink.js +0 -50
- package/tasks/.expanded.agentic.yaml +0 -280
- package/tasks/.expanded.yaml +0 -565
|
@@ -1,472 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Fetch-docs.ts
|
|
3
|
-
*
|
|
4
|
-
* Pulls documentation from the Sanity CMS and generates markdown context
|
|
5
|
-
* files for use in Promptfoo evaluations. Always produces canonical contexts;
|
|
6
|
-
* other outputs are opt-in:
|
|
7
|
-
*
|
|
8
|
-
* 1. Canonical contexts — one file per evaluation task, containing
|
|
9
|
-
* only the manually-annotated "gold" documents for that task (always)
|
|
10
|
-
* 2. Feature-area contexts — one file per GROQ feature area query
|
|
11
|
-
* (opt-in via --include-feature-areas)
|
|
12
|
-
* 3. Full corpus — all articles in one file
|
|
13
|
-
* (opt-in via --include-corpus)
|
|
14
|
-
*/
|
|
15
|
-
// oxlint-disable-next-line import/no-unassigned-import -- side-effect: loads .env into process.env
|
|
16
|
-
import "dotenv/config";
|
|
17
|
-
import { mkdirSync, writeFileSync } from "fs";
|
|
18
|
-
import { dirname, join } from "path";
|
|
19
|
-
import { fetchUrlContent, } from "../pipeline/fetch-url-content.js";
|
|
20
|
-
import { resolveMappings, } from "../pipeline/resolve-mappings.js";
|
|
21
|
-
import { createPerspectiveClient, createPublishedClient, getSanityClient, } from "../sanity/client.js";
|
|
22
|
-
import { ALL_ARTICLES_QUERY, ALL_FEATURE_AREAS, ARTICLES_METADATA_BY_SLUGS_QUERY, ARTICLE_BY_ID_QUERY, ARTICLE_BY_SLUG_QUERY, ARTICLE_BY_SLUG_WITH_PERSPECTIVE_QUERY, FEATURE_AREA_QUERIES, } from "../sanity/queries.js";
|
|
23
|
-
import { toMarkdown } from "../sanity/portable-text.js";
|
|
24
|
-
import { loadSource } from "../sources.js";
|
|
25
|
-
// ---------------------------------------------------------------------------
|
|
26
|
-
// Helpers
|
|
27
|
-
// ---------------------------------------------------------------------------
|
|
28
|
-
const ROOT = join(dirname(new URL(import.meta.url).pathname), "..", "..");
|
|
29
|
-
/**
|
|
30
|
-
* Escape `{{` and `}}` so Promptfoo's Nunjucks template engine doesn't
|
|
31
|
-
* try to interpret them as template variables (e.g. Vue `{{ post.title }}`
|
|
32
|
-
* in Nuxt docs would break Nunjucks parsing).
|
|
33
|
-
*
|
|
34
|
-
* Uses a single-pass regex so replacement text isn't re-scanned.
|
|
35
|
-
*/
|
|
36
|
-
function escapeNunjucks(text) {
|
|
37
|
-
return text.replace(/\{\{|\}\}/g, (match) => match === "{{" ? '{{ "{{" }}' : '{{ "}}" }}');
|
|
38
|
-
}
|
|
39
|
-
function estimateTokens(text) {
|
|
40
|
-
return Math.ceil(text.length / 4);
|
|
41
|
-
}
|
|
42
|
-
// ---------------------------------------------------------------------------
|
|
43
|
-
// Perspective diffing
|
|
44
|
-
// ---------------------------------------------------------------------------
|
|
45
|
-
/**
|
|
46
|
-
* Fetch a single article by its document ID.
|
|
47
|
-
*
|
|
48
|
-
* When a perspective is active, the document is fetched through the
|
|
49
|
-
* perspective client — this is the natural behavior for Studio document
|
|
50
|
-
* URLs that include a ?perspective= parameter.
|
|
51
|
-
*/
|
|
52
|
-
async function fetchArticleById(id, source) {
|
|
53
|
-
const client = source.perspective
|
|
54
|
-
? createPerspectiveClient(source.perspective, source)
|
|
55
|
-
: getSanityClient();
|
|
56
|
-
return client.fetch(ARTICLE_BY_ID_QUERY, { id });
|
|
57
|
-
}
|
|
58
|
-
async function fetchArticleBySlug(slug) {
|
|
59
|
-
const client = getSanityClient();
|
|
60
|
-
const doc = await client.fetch(ARTICLE_BY_SLUG_QUERY, {
|
|
61
|
-
slug,
|
|
62
|
-
});
|
|
63
|
-
if (!doc) {
|
|
64
|
-
console.warn(` [warn] No article found for slug "${slug}"`);
|
|
65
|
-
return "";
|
|
66
|
-
}
|
|
67
|
-
return formatArticle(doc);
|
|
68
|
-
}
|
|
69
|
-
/**
|
|
70
|
-
* Fetch a single article by slug through a perspective-enabled client.
|
|
71
|
-
*/
|
|
72
|
-
async function fetchArticleBySlugWithPerspective(slug, source) {
|
|
73
|
-
if (!source.perspective) {
|
|
74
|
-
return fetchArticleBySlug(slug);
|
|
75
|
-
}
|
|
76
|
-
const client = createPerspectiveClient(source.perspective, source);
|
|
77
|
-
const doc = await client.fetch(ARTICLE_BY_SLUG_WITH_PERSPECTIVE_QUERY, { slug });
|
|
78
|
-
if (!doc) {
|
|
79
|
-
console.warn(` [warn] No article found for slug "${slug}" in perspective "${source.perspective}"`);
|
|
80
|
-
return "";
|
|
81
|
-
}
|
|
82
|
-
return formatArticle(doc);
|
|
83
|
-
}
|
|
84
|
-
// ---------------------------------------------------------------------------
|
|
85
|
-
// Document ID overlay
|
|
86
|
-
// ---------------------------------------------------------------------------
|
|
87
|
-
function formatArticle(doc) {
|
|
88
|
-
const sectionLabel = doc.section ? `Section: ${doc.section.title}\n` : "";
|
|
89
|
-
const desc = doc.description ? `${doc.description}\n\n` : "";
|
|
90
|
-
// Convert Portable Text to Markdown
|
|
91
|
-
const ptBlocks = doc.content ?? [];
|
|
92
|
-
const markdown = toMarkdown(ptBlocks);
|
|
93
|
-
return `## ${doc.title}\n\n${sectionLabel}${desc}${markdown}`;
|
|
94
|
-
}
|
|
95
|
-
async function generateCanonicalContexts(source) {
|
|
96
|
-
const mappings = resolveMappings(ROOT);
|
|
97
|
-
const canonicalDir = join(ROOT, "contexts", "canonical");
|
|
98
|
-
mkdirSync(canonicalDir, { recursive: true });
|
|
99
|
-
const hasPerspective = Boolean(source.perspective);
|
|
100
|
-
const hasDocumentIds = source.documentIds !== undefined && source.documentIds.length > 0;
|
|
101
|
-
const hasDirectUrls = source.urls.length > 0;
|
|
102
|
-
// Build descriptive header
|
|
103
|
-
const parts = [];
|
|
104
|
-
if (hasPerspective) {
|
|
105
|
-
parts.push(`perspective "${source.perspective}"`);
|
|
106
|
-
}
|
|
107
|
-
if (hasDocumentIds) {
|
|
108
|
-
parts.push(`${source.documentIds.length} document overlay(s)`);
|
|
109
|
-
}
|
|
110
|
-
if (hasDirectUrls) {
|
|
111
|
-
parts.push(`${source.urls.length} direct URL(s)`);
|
|
112
|
-
}
|
|
113
|
-
if (parts.length > 0) {
|
|
114
|
-
console.log(`\nGenerating canonical contexts with ${parts.join(" + ")}...\n`);
|
|
115
|
-
}
|
|
116
|
-
else {
|
|
117
|
-
console.log("\nGenerating canonical (gold-retrieval) contexts...\n");
|
|
118
|
-
}
|
|
119
|
-
// Collect all canonical slugs across all tasks for a single diff pass
|
|
120
|
-
const allSlugs = new Set();
|
|
121
|
-
for (const areaData of Object.values(mappings.feature_areas)) {
|
|
122
|
-
for (const task of areaData.tasks) {
|
|
123
|
-
for (const doc of task.canonical_docs) {
|
|
124
|
-
allSlugs.add(doc.slug);
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
// Fetch metadata for all canonical docs and write the document manifest.
|
|
129
|
-
// This captures _id, _rev, slug, title at evaluation time for traceability.
|
|
130
|
-
const metadataClient = hasPerspective
|
|
131
|
-
? createPerspectiveClient(source.perspective, source)
|
|
132
|
-
: getSanityClient();
|
|
133
|
-
const allMetadata = await metadataClient.fetch(ARTICLES_METADATA_BY_SLUGS_QUERY, { slugs: [...allSlugs] });
|
|
134
|
-
const manifestPath = join(ROOT, "contexts", "document-manifest.json");
|
|
135
|
-
const manifest = allMetadata
|
|
136
|
-
.map((m) => ({ _id: m._id, _rev: m._rev, slug: m.slug, title: m.title }))
|
|
137
|
-
.sort((a, b) => a.slug.localeCompare(b.slug));
|
|
138
|
-
writeFileSync(manifestPath, JSON.stringify(manifest, null, 2));
|
|
139
|
-
console.log(` 📋 Document manifest: ${manifest.length} docs → contexts/document-manifest.json\n`);
|
|
140
|
-
// If a perspective is active, diff all canonical slugs at once
|
|
141
|
-
let releaseImpact;
|
|
142
|
-
if (hasPerspective) {
|
|
143
|
-
releaseImpact = await identifyAffectedDocs(source, [...allSlugs]);
|
|
144
|
-
const affected = releaseImpact.added.length +
|
|
145
|
-
releaseImpact.modified.length +
|
|
146
|
-
releaseImpact.removed.length;
|
|
147
|
-
console.log(` Perspective diff: ${affected} of ${allSlugs.size} canonical docs affected`);
|
|
148
|
-
if (releaseImpact.added.length > 0) {
|
|
149
|
-
console.log(` Added: ${releaseImpact.added.join(", ")}`);
|
|
150
|
-
}
|
|
151
|
-
if (releaseImpact.modified.length > 0) {
|
|
152
|
-
console.log(` Modified: ${releaseImpact.modified.join(", ")}`);
|
|
153
|
-
}
|
|
154
|
-
if (releaseImpact.removed.length > 0) {
|
|
155
|
-
console.log(` Removed: ${releaseImpact.removed.join(", ")}`);
|
|
156
|
-
}
|
|
157
|
-
console.log();
|
|
158
|
-
}
|
|
159
|
-
// If document IDs are specified, resolve them against the canonical set
|
|
160
|
-
let documentOverlay;
|
|
161
|
-
if (hasDocumentIds) {
|
|
162
|
-
console.log(" Resolving " +
|
|
163
|
-
source.documentIds.length +
|
|
164
|
-
" document ID(s) against canonical set...");
|
|
165
|
-
documentOverlay = await resolveDocumentOverlay(source.documentIds, allSlugs, source);
|
|
166
|
-
console.log(" Document overlay: " +
|
|
167
|
-
documentOverlay.replacements.size +
|
|
168
|
-
" replacement(s), " +
|
|
169
|
-
documentOverlay.appendedContent.length +
|
|
170
|
-
" appended");
|
|
171
|
-
console.log();
|
|
172
|
-
}
|
|
173
|
-
// If direct URLs are specified, fetch their content
|
|
174
|
-
const urlContent = [];
|
|
175
|
-
const urlFetchMetadata = [];
|
|
176
|
-
if (source.urls.length > 0) {
|
|
177
|
-
console.log(` Fetching ${source.urls.length} direct URL(s)...`);
|
|
178
|
-
for (const url of source.urls) {
|
|
179
|
-
const result = await fetchUrlContent(url, source.headers);
|
|
180
|
-
urlFetchMetadata.push({
|
|
181
|
-
contentLength: result.content?.length,
|
|
182
|
-
error: result.error,
|
|
183
|
-
method: result.method,
|
|
184
|
-
status: result.status,
|
|
185
|
-
url: result.url,
|
|
186
|
-
});
|
|
187
|
-
if (result.content) {
|
|
188
|
-
urlContent.push(result.content);
|
|
189
|
-
const tokens = estimateTokens(result.content);
|
|
190
|
-
console.log(` ✅ ${url} (via ${result.method}, ~${tokens} tokens)`);
|
|
191
|
-
}
|
|
192
|
-
else {
|
|
193
|
-
console.warn(` ⚠️ ${url}: ${result.error}`);
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
|
-
console.log();
|
|
197
|
-
}
|
|
198
|
-
const affectedSlugs = new Set([
|
|
199
|
-
...(releaseImpact?.added ?? []),
|
|
200
|
-
...(releaseImpact?.modified ?? []),
|
|
201
|
-
]);
|
|
202
|
-
const removedSlugs = new Set(releaseImpact?.removed ?? []);
|
|
203
|
-
for (const [area, areaData] of Object.entries(mappings.feature_areas)) {
|
|
204
|
-
console.log(` Feature area: ${area}`);
|
|
205
|
-
for (const task of areaData.tasks) {
|
|
206
|
-
console.log(` Task: ${task.id}`);
|
|
207
|
-
const docContents = [];
|
|
208
|
-
for (const doc of task.canonical_docs) {
|
|
209
|
-
if (removedSlugs.has(doc.slug)) {
|
|
210
|
-
console.log(` ⚠️ Skipping: ${doc.slug} (removed in perspective)`);
|
|
211
|
-
continue;
|
|
212
|
-
}
|
|
213
|
-
// Check if this slug has a document overlay replacement
|
|
214
|
-
if (documentOverlay?.replacements.has(doc.slug)) {
|
|
215
|
-
console.log(` Fetching: ${doc.slug} (from document overlay)`);
|
|
216
|
-
docContents.push(documentOverlay.replacements.get(doc.slug));
|
|
217
|
-
}
|
|
218
|
-
else if (affectedSlugs.has(doc.slug)) {
|
|
219
|
-
console.log(` Fetching: ${doc.slug} (from perspective)`);
|
|
220
|
-
const content = await fetchArticleBySlugWithPerspective(doc.slug, source);
|
|
221
|
-
if (content) {
|
|
222
|
-
docContents.push(content);
|
|
223
|
-
}
|
|
224
|
-
}
|
|
225
|
-
else {
|
|
226
|
-
console.log(` Fetching: ${doc.slug}`);
|
|
227
|
-
const content = await fetchArticleBySlug(doc.slug);
|
|
228
|
-
if (content) {
|
|
229
|
-
docContents.push(content);
|
|
230
|
-
}
|
|
231
|
-
}
|
|
232
|
-
}
|
|
233
|
-
// Append any extra documents from the overlay that didn't match
|
|
234
|
-
// Canonical slugs — these are added to every task's context
|
|
235
|
-
if (documentOverlay && documentOverlay.appendedContent.length > 0) {
|
|
236
|
-
docContents.push(...documentOverlay.appendedContent);
|
|
237
|
-
console.log(` + ${documentOverlay.appendedContent.length} appended document(s) from overlay`);
|
|
238
|
-
}
|
|
239
|
-
// Append URL-fetched content (from --url classified as direct-url)
|
|
240
|
-
if (urlContent.length > 0) {
|
|
241
|
-
docContents.push(...urlContent);
|
|
242
|
-
console.log(` + ${urlContent.length} URL-fetched document(s)`);
|
|
243
|
-
}
|
|
244
|
-
const combined = docContents.join("\n\n---\n\n");
|
|
245
|
-
const outPath = join(canonicalDir, `${task.id}.md`);
|
|
246
|
-
writeFileSync(outPath, escapeNunjucks(combined));
|
|
247
|
-
console.log(` -> ${task.id}.md: ~${estimateTokens(combined)} tokens`);
|
|
248
|
-
}
|
|
249
|
-
}
|
|
250
|
-
// Write release impact file for downstream consumption (scoring, reports)
|
|
251
|
-
if (releaseImpact) {
|
|
252
|
-
const impactPath = join(ROOT, "contexts", "release-impact.json");
|
|
253
|
-
writeFileSync(impactPath, JSON.stringify(releaseImpact, null, 2));
|
|
254
|
-
console.log(`\n 📄 Release impact written to contexts/release-impact.json`);
|
|
255
|
-
}
|
|
256
|
-
// Write document overlay metadata for downstream consumption
|
|
257
|
-
if (documentOverlay) {
|
|
258
|
-
const overlayMeta = {
|
|
259
|
-
appendedCount: documentOverlay.appendedContent.length,
|
|
260
|
-
documentIds: source.documentIds,
|
|
261
|
-
replacedSlugs: [...documentOverlay.replacements.keys()],
|
|
262
|
-
};
|
|
263
|
-
const overlayPath = join(ROOT, "contexts", "document-overlay.json");
|
|
264
|
-
writeFileSync(overlayPath, JSON.stringify(overlayMeta, null, 2));
|
|
265
|
-
console.log("\n 📄 Document overlay written to contexts/document-overlay.json");
|
|
266
|
-
}
|
|
267
|
-
// Write URL fetch metadata for downstream consumption
|
|
268
|
-
if (urlFetchMetadata.length > 0) {
|
|
269
|
-
const fetched = urlFetchMetadata.filter((m) => m.method !== "failed");
|
|
270
|
-
const failures = urlFetchMetadata.filter((m) => m.method === "failed");
|
|
271
|
-
const meta = {
|
|
272
|
-
failures: failures.map((f) => ({ error: f.error, url: f.url })),
|
|
273
|
-
fetchedUrls: fetched,
|
|
274
|
-
totalFailed: failures.length,
|
|
275
|
-
totalFetched: fetched.length,
|
|
276
|
-
};
|
|
277
|
-
const urlFetchPath = join(ROOT, "contexts", "url-fetch.json");
|
|
278
|
-
writeFileSync(urlFetchPath, JSON.stringify(meta, null, 2));
|
|
279
|
-
console.log("\n 📄 URL fetch metadata written to contexts/url-fetch.json");
|
|
280
|
-
}
|
|
281
|
-
}
|
|
282
|
-
// ---------------------------------------------------------------------------
|
|
283
|
-
// Feature-area contexts
|
|
284
|
-
// ---------------------------------------------------------------------------
|
|
285
|
-
async function generateFeatureAreaContexts(source) {
|
|
286
|
-
const client = getSanityClient(undefined, source);
|
|
287
|
-
const contextsDir = join(ROOT, "contexts");
|
|
288
|
-
mkdirSync(contextsDir, { recursive: true });
|
|
289
|
-
console.log("Generating feature-area contexts...\n");
|
|
290
|
-
for (const feature of ALL_FEATURE_AREAS) {
|
|
291
|
-
const query = FEATURE_AREA_QUERIES[feature];
|
|
292
|
-
const docs = await client.fetch(query);
|
|
293
|
-
if (docs.length === 0) {
|
|
294
|
-
console.warn(` [warn] No articles found for "${feature}"`);
|
|
295
|
-
continue;
|
|
296
|
-
}
|
|
297
|
-
const combined = docs.map(formatArticle).join("\n\n---\n\n");
|
|
298
|
-
const outPath = join(contextsDir, `${feature}.md`);
|
|
299
|
-
writeFileSync(outPath, escapeNunjucks(combined));
|
|
300
|
-
console.log(` ${feature}: ${docs.length} articles, ~${estimateTokens(combined)} tokens`);
|
|
301
|
-
}
|
|
302
|
-
}
|
|
303
|
-
// ---------------------------------------------------------------------------
|
|
304
|
-
// Canonical (gold-retrieval) contexts
|
|
305
|
-
// ---------------------------------------------------------------------------
|
|
306
|
-
async function generateFullCorpus(source) {
|
|
307
|
-
const client = getSanityClient(undefined, source);
|
|
308
|
-
console.log("\nGenerating full corpus...");
|
|
309
|
-
const docs = await client.fetch(ALL_ARTICLES_QUERY);
|
|
310
|
-
const corpus = docs
|
|
311
|
-
.map((d) => {
|
|
312
|
-
const ptBlocks = d.content ?? [];
|
|
313
|
-
const markdown = toMarkdown(ptBlocks);
|
|
314
|
-
return (`## ${d.title}\n\n` +
|
|
315
|
-
// oxlint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- empty string title should fall back to "General"
|
|
316
|
-
`Section: ${d.section?.title || "General"}\n` +
|
|
317
|
-
`URL: ${source.baseUrl}/${d.slug}\n\n` +
|
|
318
|
-
markdown);
|
|
319
|
-
})
|
|
320
|
-
.join("\n\n---\n\n");
|
|
321
|
-
const outDir = join(ROOT, "contexts");
|
|
322
|
-
mkdirSync(outDir, { recursive: true });
|
|
323
|
-
writeFileSync(join(outDir, "full-corpus.md"), escapeNunjucks(corpus));
|
|
324
|
-
console.log(` full-corpus.md: ${docs.length} articles, ~${estimateTokens(corpus)} tokens`);
|
|
325
|
-
}
|
|
326
|
-
/**
|
|
327
|
-
* Compare canonical documents between the published dataset and a perspective
|
|
328
|
-
* to identify which docs have been added, modified, or removed.
|
|
329
|
-
*
|
|
330
|
-
* The comparison uses _rev to detect modifications — if the _rev differs
|
|
331
|
-
* between published and perspective, the document has been modified.
|
|
332
|
-
*/
|
|
333
|
-
async function identifyAffectedDocs(source, canonicalSlugs) {
|
|
334
|
-
if (!source.perspective) {
|
|
335
|
-
return {
|
|
336
|
-
added: [],
|
|
337
|
-
modified: [],
|
|
338
|
-
removed: [],
|
|
339
|
-
unchanged: [...canonicalSlugs],
|
|
340
|
-
};
|
|
341
|
-
}
|
|
342
|
-
const publishedClient = createPublishedClient(source);
|
|
343
|
-
const perspectiveClient = createPerspectiveClient(source.perspective, source);
|
|
344
|
-
const [publishedMeta, perspectiveMeta] = await Promise.all([
|
|
345
|
-
publishedClient.fetch(ARTICLES_METADATA_BY_SLUGS_QUERY, {
|
|
346
|
-
slugs: canonicalSlugs,
|
|
347
|
-
}),
|
|
348
|
-
perspectiveClient.fetch(ARTICLES_METADATA_BY_SLUGS_QUERY, { slugs: canonicalSlugs }),
|
|
349
|
-
]);
|
|
350
|
-
const publishedMap = new Map(publishedMeta.map((d) => [d.slug, d]));
|
|
351
|
-
const perspectiveMap = new Map(perspectiveMeta.map((d) => [d.slug, d]));
|
|
352
|
-
const added = [];
|
|
353
|
-
const modified = [];
|
|
354
|
-
const removed = [];
|
|
355
|
-
const unchanged = [];
|
|
356
|
-
for (const slug of canonicalSlugs) {
|
|
357
|
-
const pub = publishedMap.get(slug);
|
|
358
|
-
const persp = perspectiveMap.get(slug);
|
|
359
|
-
if (!pub && persp) {
|
|
360
|
-
added.push(slug);
|
|
361
|
-
}
|
|
362
|
-
else if (pub && !persp) {
|
|
363
|
-
removed.push(slug);
|
|
364
|
-
}
|
|
365
|
-
else if (pub && persp && pub._rev !== persp._rev) {
|
|
366
|
-
modified.push(slug);
|
|
367
|
-
}
|
|
368
|
-
else {
|
|
369
|
-
unchanged.push(slug);
|
|
370
|
-
}
|
|
371
|
-
}
|
|
372
|
-
return { added, modified, removed, unchanged };
|
|
373
|
-
}
|
|
374
|
-
// ---------------------------------------------------------------------------
|
|
375
|
-
// Full corpus (optional — useful for retrieval experiments)
|
|
376
|
-
// ---------------------------------------------------------------------------
|
|
377
|
-
async function main() {
|
|
378
|
-
console.log("=== ai-literacy-framework — Documentation Fetcher ===\n");
|
|
379
|
-
const args = process.argv.slice(2);
|
|
380
|
-
const includeFeatureAreas = args.includes("--include-feature-areas");
|
|
381
|
-
const includeCorpus = args.includes("--include-corpus");
|
|
382
|
-
// Parse --source <name> argument
|
|
383
|
-
const sourceIdx = args.indexOf("--source");
|
|
384
|
-
const sourceName = sourceIdx !== -1 ? args[sourceIdx + 1] : undefined;
|
|
385
|
-
const source = loadSource(sourceName);
|
|
386
|
-
console.log(` Source: ${sourceName ?? "default (production)"}`);
|
|
387
|
-
console.log(` Base URL: ${source.baseUrl}`);
|
|
388
|
-
if (source.dataset) {
|
|
389
|
-
console.log(` Dataset: ${source.dataset}`);
|
|
390
|
-
}
|
|
391
|
-
if (source.perspective) {
|
|
392
|
-
console.log(` Perspective: ${source.perspective}`);
|
|
393
|
-
}
|
|
394
|
-
if (source.documentIds && source.documentIds.length > 0) {
|
|
395
|
-
console.log(` Documents: ${source.documentIds.length} document ID(s)`);
|
|
396
|
-
for (const id of source.documentIds) {
|
|
397
|
-
console.log(` ${id}`);
|
|
398
|
-
}
|
|
399
|
-
}
|
|
400
|
-
if (source.urls.length > 0) {
|
|
401
|
-
console.log(` URLs: ${source.urls.length} direct URL(s)`);
|
|
402
|
-
for (const u of source.urls) {
|
|
403
|
-
console.log(` ${u}`);
|
|
404
|
-
}
|
|
405
|
-
}
|
|
406
|
-
console.log();
|
|
407
|
-
if (includeFeatureAreas) {
|
|
408
|
-
await generateFeatureAreaContexts(source);
|
|
409
|
-
}
|
|
410
|
-
await generateCanonicalContexts(source);
|
|
411
|
-
if (includeCorpus) {
|
|
412
|
-
await generateFullCorpus(source);
|
|
413
|
-
}
|
|
414
|
-
console.log("\nDone!");
|
|
415
|
-
}
|
|
416
|
-
// ---------------------------------------------------------------------------
|
|
417
|
-
// Main
|
|
418
|
-
// ---------------------------------------------------------------------------
|
|
419
|
-
/**
|
|
420
|
-
* Resolve document IDs into a DocumentOverlay that describes how they
|
|
421
|
-
* interact with the canonical doc set.
|
|
422
|
-
*
|
|
423
|
-
* For each document ID:
|
|
424
|
-
* 1. Fetch the document from Sanity (through perspective if active)
|
|
425
|
-
* 2. Check if the fetched document's slug matches a canonical slug
|
|
426
|
-
* 3. If it matches -> add to replacements (replaces the canonical fetch)
|
|
427
|
-
* 4. If it doesn't match -> add to appendedContent (extra context)
|
|
428
|
-
*
|
|
429
|
-
* @param documentIds - Array of Sanity document IDs to resolve
|
|
430
|
-
* @param canonicalSlugs - Set of all canonical doc slugs across all tasks
|
|
431
|
-
* @param source - The resolved documentation source configuration
|
|
432
|
-
*/
|
|
433
|
-
async function resolveDocumentOverlay(documentIds, canonicalSlugs, source) {
|
|
434
|
-
const overlay = {
|
|
435
|
-
appendedContent: [],
|
|
436
|
-
replacements: new Map(),
|
|
437
|
-
};
|
|
438
|
-
if (documentIds.length === 0) {
|
|
439
|
-
return overlay;
|
|
440
|
-
}
|
|
441
|
-
// Fetch all documents in parallel
|
|
442
|
-
const results = await Promise.all(documentIds.map(async (id) => {
|
|
443
|
-
const doc = await fetchArticleById(id, source);
|
|
444
|
-
return { doc, id };
|
|
445
|
-
}));
|
|
446
|
-
for (const { doc, id } of results) {
|
|
447
|
-
if (!doc) {
|
|
448
|
-
console.warn(' [warn] No article found for document ID "' + id + '"');
|
|
449
|
-
continue;
|
|
450
|
-
}
|
|
451
|
-
const content = formatArticle(doc);
|
|
452
|
-
if (!content) {
|
|
453
|
-
continue;
|
|
454
|
-
}
|
|
455
|
-
if (doc.slug && canonicalSlugs.has(doc.slug)) {
|
|
456
|
-
// This document matches a canonical slug — replace it
|
|
457
|
-
overlay.replacements.set(doc.slug, content);
|
|
458
|
-
console.log(" 📄 Document " + id + ' → replaces canonical doc "' + doc.slug + '"');
|
|
459
|
-
}
|
|
460
|
-
else {
|
|
461
|
-
// This document doesn't match any canonical slug — append it
|
|
462
|
-
overlay.appendedContent.push(content);
|
|
463
|
-
const slugInfo = doc.slug ? ' (slug: "' + doc.slug + '")' : "";
|
|
464
|
-
console.log(" 📄 Document " + id + " → appended as additional context" + slugInfo);
|
|
465
|
-
}
|
|
466
|
-
}
|
|
467
|
-
return overlay;
|
|
468
|
-
}
|
|
469
|
-
main().catch((err) => {
|
|
470
|
-
console.error("Fatal error:", err);
|
|
471
|
-
process.exit(1);
|
|
472
|
-
});
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Generate-configs.ts
|
|
3
|
-
*
|
|
4
|
-
* Reads config/models.yaml (the central model registry) and generates all
|
|
5
|
-
* promptfoo config files with the correct provider entries.
|
|
6
|
-
*
|
|
7
|
-
* This keeps model definitions in one place — add a model to config/models.yaml
|
|
8
|
-
* and run `pnpm generate-configs` to propagate it to all eval modes.
|
|
9
|
-
*
|
|
10
|
-
* Generated configs:
|
|
11
|
-
* - promptfooconfig.yaml (baseline: with-docs vs without-docs)
|
|
12
|
-
* - promptfooconfig.observed.yaml (instrumented HTTP recording)
|
|
13
|
-
* - promptfooconfig.agentic.yaml (agentic tool-calling: naive vs optimized)
|
|
14
|
-
*
|
|
15
|
-
* Usage:
|
|
16
|
-
* pnpm generate-configs
|
|
17
|
-
* # or
|
|
18
|
-
* tsx src/scripts/generate-configs.ts
|
|
19
|
-
*/
|
|
20
|
-
import type { ModelEntry } from "../pipeline/types.js";
|
|
21
|
-
/** Auto-discover all task YAML files in the tasks/ directory. */
|
|
22
|
-
export declare function discoverTaskFiles(rootDir: string): string[];
|
|
23
|
-
interface LoadedPrompts {
|
|
24
|
-
agentic: {
|
|
25
|
-
id: string;
|
|
26
|
-
label: string;
|
|
27
|
-
raw: string;
|
|
28
|
-
};
|
|
29
|
-
withDocs: {
|
|
30
|
-
id: string;
|
|
31
|
-
label: string;
|
|
32
|
-
raw: string;
|
|
33
|
-
};
|
|
34
|
-
withoutDocs: {
|
|
35
|
-
id: string;
|
|
36
|
-
label: string;
|
|
37
|
-
raw: string;
|
|
38
|
-
};
|
|
39
|
-
}
|
|
40
|
-
/**
|
|
41
|
-
* Extract the raw API model name from a promptfoo provider ID.
|
|
42
|
-
*
|
|
43
|
-
* Promptfoo IDs encode the provider + sub-protocol + model, e.g.:
|
|
44
|
-
* "openai:chat:gpt-5.2" → "gpt-5.2"
|
|
45
|
-
* "anthropic:messages:claude-opus-4-6" → "claude-opus-4-6"
|
|
46
|
-
* "openai:gpt-4o" → "gpt-4o"
|
|
47
|
-
* "google:gemini-2.5-pro" → "gemini-2.5-pro"
|
|
48
|
-
*
|
|
49
|
-
* Falls back to stripping everything before the first colon for unknown
|
|
50
|
-
* providers (e.g., "openrouter:deepseek/deepseek-r1" → "deepseek/deepseek-r1").
|
|
51
|
-
*/
|
|
52
|
-
export declare function extractModelName(id: string): string;
|
|
53
|
-
/**
|
|
54
|
-
* Extract the LLM provider family from a promptfoo provider ID.
|
|
55
|
-
*
|
|
56
|
-
* "openai:chat:gpt-5.2" → "openai"
|
|
57
|
-
* "anthropic:messages:claude-opus-4-6" → "anthropic"
|
|
58
|
-
* "google:gemini-2.5-pro" → "google"
|
|
59
|
-
*/
|
|
60
|
-
export declare function extractProvider(id: string): string;
|
|
61
|
-
/** Load prompt templates from config/prompts.yaml. Throws if missing or malformed. */
|
|
62
|
-
export declare function loadPrompts(rootDir: string): LoadedPrompts;
|
|
63
|
-
/** Merge default config with model-specific config */
|
|
64
|
-
export declare function mergeConfig(defaults: Record<string, unknown>, modelConfig?: Record<string, unknown>, overrides?: Record<string, unknown>): Record<string, unknown>;
|
|
65
|
-
export declare function modelMatchesMode(model: ModelEntry, mode: string): boolean;
|
|
66
|
-
export {};
|