@sanity/ailf 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/dist/_vendor/ailf-core/examples/index.d.ts +16 -12
  2. package/dist/_vendor/ailf-core/examples/index.js +19 -12
  3. package/dist/_vendor/ailf-core/ports/context.d.ts +4 -0
  4. package/dist/adapters/task-sources/repo-schemas.d.ts +12 -2
  5. package/dist/adapters/task-sources/repo-schemas.js +28 -2
  6. package/dist/cli.js +0 -0
  7. package/dist/commands/init.js +17 -5
  8. package/dist/commands/pipeline-action.js +44 -6
  9. package/dist/commands/publish.js +2 -1
  10. package/dist/commands/validate-tasks.js +4 -1
  11. package/dist/composition-root.js +9 -5
  12. package/dist/orchestration/build-app-context.js +2 -0
  13. package/package.json +1 -1
  14. package/dist/commands/update-quality-scores.d.ts +0 -5
  15. package/dist/commands/update-quality-scores.js +0 -20
  16. package/dist/lib/agent-behavior-report.d.ts +0 -8
  17. package/dist/lib/agent-behavior-report.js +0 -185
  18. package/dist/lib/baseline.d.ts +0 -19
  19. package/dist/lib/baseline.js +0 -153
  20. package/dist/lib/calculate-scores.d.ts +0 -23
  21. package/dist/lib/calculate-scores.js +0 -42
  22. package/dist/lib/compare.d.ts +0 -18
  23. package/dist/lib/compare.js +0 -170
  24. package/dist/lib/coverage-audit.d.ts +0 -4
  25. package/dist/lib/coverage-audit.js +0 -42
  26. package/dist/lib/discovery-report.d.ts +0 -13
  27. package/dist/lib/discovery-report.js +0 -57
  28. package/dist/lib/fetch-docs.d.ts +0 -30
  29. package/dist/lib/fetch-docs.js +0 -171
  30. package/dist/lib/generate-configs.d.ts +0 -25
  31. package/dist/lib/generate-configs.js +0 -42
  32. package/dist/lib/grader-api.d.ts +0 -21
  33. package/dist/lib/grader-api.js +0 -34
  34. package/dist/lib/grader-compare.d.ts +0 -19
  35. package/dist/lib/grader-compare.js +0 -91
  36. package/dist/lib/grader-consistency.d.ts +0 -27
  37. package/dist/lib/grader-consistency.js +0 -79
  38. package/dist/lib/grader-sensitivity.d.ts +0 -19
  39. package/dist/lib/grader-sensitivity.js +0 -75
  40. package/dist/lib/grader-validate.d.ts +0 -19
  41. package/dist/lib/grader-validate.js +0 -78
  42. package/dist/lib/measure-retrieval.d.ts +0 -14
  43. package/dist/lib/measure-retrieval.js +0 -71
  44. package/dist/lib/pr-comment.d.ts +0 -16
  45. package/dist/lib/pr-comment.js +0 -28
  46. package/dist/lib/readiness-report.d.ts +0 -13
  47. package/dist/lib/readiness-report.js +0 -108
  48. package/dist/lib/webhook-server.d.ts +0 -11
  49. package/dist/lib/webhook-server.js +0 -24
  50. package/dist/lib/weekly-digest.d.ts +0 -24
  51. package/dist/lib/weekly-digest.js +0 -148
  52. package/dist/orchestration/env-bridge.d.ts +0 -21
  53. package/dist/orchestration/env-bridge.js +0 -66
  54. package/dist/orchestration/steps/fetch-docs-shell.d.ts +0 -17
  55. package/dist/orchestration/steps/fetch-docs-shell.js +0 -30
  56. package/dist/pipeline/steps/calculate-scores-step.d.ts +0 -11
  57. package/dist/pipeline/steps/calculate-scores-step.js +0 -89
  58. package/dist/pipeline/steps/compare-step.d.ts +0 -18
  59. package/dist/pipeline/steps/compare-step.js +0 -90
  60. package/dist/pipeline/steps/eval-step.d.ts +0 -53
  61. package/dist/pipeline/steps/eval-step.js +0 -347
  62. package/dist/pipeline/steps/fetch-docs-step.d.ts +0 -11
  63. package/dist/pipeline/steps/fetch-docs-step.js +0 -84
  64. package/dist/pipeline/steps/generate-configs-step.d.ts +0 -11
  65. package/dist/pipeline/steps/generate-configs-step.js +0 -98
  66. package/dist/pipeline/steps/grader-consistency-step.d.ts +0 -21
  67. package/dist/pipeline/steps/grader-consistency-step.js +0 -74
  68. package/dist/pipeline/steps/publish-report-step.d.ts +0 -57
  69. package/dist/pipeline/steps/publish-report-step.js +0 -243
  70. package/dist/pipeline/steps/report-step.d.ts +0 -13
  71. package/dist/pipeline/steps/report-step.js +0 -56
  72. package/dist/pipeline/steps/update-scores-step.d.ts +0 -11
  73. package/dist/pipeline/steps/update-scores-step.js +0 -42
  74. package/dist/scripts/agent-behavior-report.d.ts +0 -19
  75. package/dist/scripts/agent-behavior-report.js +0 -315
  76. package/dist/scripts/baseline.d.ts +0 -43
  77. package/dist/scripts/baseline.js +0 -267
  78. package/dist/scripts/calculate-scores.d.ts +0 -166
  79. package/dist/scripts/calculate-scores.js +0 -1296
  80. package/dist/scripts/compare.d.ts +0 -22
  81. package/dist/scripts/compare.js +0 -334
  82. package/dist/scripts/coverage-audit.d.ts +0 -44
  83. package/dist/scripts/coverage-audit.js +0 -209
  84. package/dist/scripts/debug-eval.d.ts +0 -19
  85. package/dist/scripts/debug-eval.js +0 -73
  86. package/dist/scripts/discovery-report.d.ts +0 -58
  87. package/dist/scripts/discovery-report.js +0 -250
  88. package/dist/scripts/fetch-docs.d.ts +0 -35
  89. package/dist/scripts/fetch-docs.js +0 -472
  90. package/dist/scripts/generate-configs.d.ts +0 -66
  91. package/dist/scripts/generate-configs.js +0 -459
  92. package/dist/scripts/grader-api.d.ts +0 -27
  93. package/dist/scripts/grader-api.js +0 -206
  94. package/dist/scripts/grader-compare.d.ts +0 -22
  95. package/dist/scripts/grader-compare.js +0 -368
  96. package/dist/scripts/grader-consistency.d.ts +0 -20
  97. package/dist/scripts/grader-consistency.js +0 -313
  98. package/dist/scripts/grader-sensitivity.d.ts +0 -22
  99. package/dist/scripts/grader-sensitivity.js +0 -354
  100. package/dist/scripts/grader-validate.d.ts +0 -19
  101. package/dist/scripts/grader-validate.js +0 -267
  102. package/dist/scripts/measure-retrieval.d.ts +0 -10
  103. package/dist/scripts/measure-retrieval.js +0 -145
  104. package/dist/scripts/pipeline.d.ts +0 -76
  105. package/dist/scripts/pipeline.js +0 -1031
  106. package/dist/scripts/pr-comment.d.ts +0 -10
  107. package/dist/scripts/pr-comment.js +0 -510
  108. package/dist/scripts/readiness-report.d.ts +0 -88
  109. package/dist/scripts/readiness-report.js +0 -342
  110. package/dist/scripts/update-quality-scores.d.ts +0 -15
  111. package/dist/scripts/update-quality-scores.js +0 -184
  112. package/dist/scripts/validate.d.ts +0 -13
  113. package/dist/scripts/validate.js +0 -79
  114. package/dist/scripts/webhook-server.d.ts +0 -26
  115. package/dist/scripts/webhook-server.js +0 -147
  116. package/dist/scripts/weekly-digest.d.ts +0 -24
  117. package/dist/scripts/weekly-digest.js +0 -144
  118. package/dist/sinks/format-slack.d.ts +0 -64
  119. package/dist/sinks/format-slack.js +0 -306
  120. package/dist/sinks/slack-sink.d.ts +0 -27
  121. package/dist/sinks/slack-sink.js +0 -78
  122. package/dist/sinks/webhook-sink.d.ts +0 -19
  123. package/dist/sinks/webhook-sink.js +0 -50
  124. package/tasks/.expanded.agentic.yaml +0 -51
  125. package/tasks/.expanded.yaml +0 -66
@@ -1,73 +0,0 @@
1
- /**
2
- * debug-eval.ts
3
- *
4
- * Thin wrapper around `promptfoo eval` that reads DEBUG_EVAL_* environment
5
- * variables and forwards them as native promptfoo filter flags.
6
- *
7
- * Environment variables:
8
- * DEBUG_EVAL=1 — required to enable debug mode
9
- * DEBUG_EVAL_PATTERN=<re> — --filter-pattern (regex on test description)
10
- * DEBUG_EVAL_N=<number> — --filter-first-n (default: 2 when no other filters set)
11
- * DEBUG_EVAL_SAMPLE=<number> — --filter-sample (random N tests)
12
- *
13
- * Usage:
14
- * tsx src/scripts/debug-eval.ts --config promptfooconfig.yaml
15
- * tsx src/scripts/debug-eval.ts --config promptfooconfig.agentic.yaml --no-cache
16
- *
17
- * All extra argv are forwarded to promptfoo eval unchanged.
18
- */
19
- import { execSync } from "child_process";
20
- // ---------------------------------------------------------------------------
21
- // Parse DEBUG_EVAL_* environment
22
- // ---------------------------------------------------------------------------
23
- const debugEnabled = process.env.DEBUG_EVAL === "1";
24
- if (!debugEnabled) {
25
- console.error("⚠ debug-eval.ts called without DEBUG_EVAL=1 — running full eval instead.");
26
- }
27
- const pattern = process.env.DEBUG_EVAL_PATTERN;
28
- const firstN = process.env.DEBUG_EVAL_N;
29
- const sample = process.env.DEBUG_EVAL_SAMPLE;
30
- // ---------------------------------------------------------------------------
31
- // Build filter flags
32
- // ---------------------------------------------------------------------------
33
- const filterFlags = [];
34
- if (debugEnabled) {
35
- if (pattern) {
36
- filterFlags.push(`--filter-pattern`, `'${pattern}'`);
37
- }
38
- if (sample) {
39
- filterFlags.push(`--filter-sample`, sample);
40
- }
41
- if (firstN) {
42
- filterFlags.push(`--filter-first-n`, firstN);
43
- }
44
- // Default: if no filters specified, limit to first 2 tests for speed
45
- if (filterFlags.length === 0) {
46
- filterFlags.push(`--filter-first-n`, "2");
47
- }
48
- }
49
- // ---------------------------------------------------------------------------
50
- // Forward to promptfoo eval
51
- // ---------------------------------------------------------------------------
52
- // argv[0] = node, argv[1] = this script, argv[2..] = user args
53
- const passthroughArgs = process.argv.slice(2);
54
- const allArgs = ["eval", ...passthroughArgs, ...filterFlags];
55
- const cmd = `promptfoo ${allArgs.join(" ")}`;
56
- if (debugEnabled) {
57
- console.log(`\n🐛 Debug mode enabled`);
58
- console.log(` Filters: ${filterFlags.join(" ") || "(default: first 2)"}`);
59
- console.log(` Command: ${cmd}\n`);
60
- }
61
- try {
62
- execSync(cmd, {
63
- env: process.env,
64
- stdio: "inherit",
65
- });
66
- }
67
- catch (err) {
68
- // promptfoo exits non-zero when assertions fail — that's expected
69
- const code = err !== null && typeof err === "object" && "status" in err
70
- ? err.status
71
- : 1;
72
- process.exit(code);
73
- }
@@ -1,58 +0,0 @@
1
- /**
2
- * discovery-report.ts
3
- *
4
- * Generates an agent discoverability report from agentic mode retrieval
5
- * metrics. Reads score-summary.json (which contains `retrievalMetrics`
6
- * from agentic evaluation) and produces a markdown report showing:
7
- *
8
- * - Retrieval summary (recall, precision, F1)
9
- * - Per-area retrieval breakdown
10
- * - Invisible documents (never retrieved by any task)
11
- * - Recommendations for improving discoverability
12
- *
13
- * Phase 5c of the Scenario Matrix implementation (Scenarios 4.1 and 4.2).
14
- *
15
- * Usage:
16
- * tsx src/scripts/discovery-report.ts # stdout
17
- * tsx src/scripts/discovery-report.ts --area groq # filter by area
18
- * tsx src/scripts/discovery-report.ts --output report.md
19
- *
20
- * @see docs/design-docs/retrieval-metrics.md
21
- */
22
- import "dotenv/config";
23
- import type { AreaRetrievalMetrics, RetrievalMetrics, ScoreSummary } from "../pipeline/types.js";
24
- export interface DiscoveryReport {
25
- /** All areas included in the report (after filtering) */
26
- areas: AreaRetrievalMetrics[];
27
- /** Base URL from the score summary source config */
28
- baseUrl: string | undefined;
29
- /** Document slugs that were never retrieved by any task */
30
- invisibleDocs: InvisibleDoc[];
31
- /** Overall retrieval metrics */
32
- overall: RetrievalMetrics["overall"];
33
- /** Actionable recommendations */
34
- recommendations: string[];
35
- /** ISO timestamp of the source evaluation */
36
- timestamp: string;
37
- /** Total canonical docs across included areas */
38
- totalCanonicalDocs: number;
39
- /** Total hits (canonical docs successfully retrieved) */
40
- totalHits: number;
41
- }
42
- export interface InvisibleDoc {
43
- /** Tasks that reference this document via canonical_docs */
44
- referencedBy: string[];
45
- /** The document slug */
46
- slug: string;
47
- }
48
- /**
49
- * Format a discovery report as markdown.
50
- */
51
- export declare function formatDiscoveryMarkdown(report: DiscoveryReport): string;
52
- /**
53
- * Generate a structured discovery report from a score summary.
54
- *
55
- * @param summary - Parsed score-summary.json
56
- * @param areaFilter - Optional area names to include (all if empty)
57
- */
58
- export declare function generateDiscoveryReport(summary: ScoreSummary, areaFilter?: string[]): DiscoveryReport;
@@ -1,250 +0,0 @@
1
- /**
2
- * discovery-report.ts
3
- *
4
- * Generates an agent discoverability report from agentic mode retrieval
5
- * metrics. Reads score-summary.json (which contains `retrievalMetrics`
6
- * from agentic evaluation) and produces a markdown report showing:
7
- *
8
- * - Retrieval summary (recall, precision, F1)
9
- * - Per-area retrieval breakdown
10
- * - Invisible documents (never retrieved by any task)
11
- * - Recommendations for improving discoverability
12
- *
13
- * Phase 5c of the Scenario Matrix implementation (Scenarios 4.1 and 4.2).
14
- *
15
- * Usage:
16
- * tsx src/scripts/discovery-report.ts # stdout
17
- * tsx src/scripts/discovery-report.ts --area groq # filter by area
18
- * tsx src/scripts/discovery-report.ts --output report.md
19
- *
20
- * @see docs/design-docs/retrieval-metrics.md
21
- */
22
- // oxlint-disable-next-line import/no-unassigned-import -- side-effect: loads .env into process.env
23
- import "dotenv/config";
24
- import { existsSync, readFileSync, writeFileSync } from "node:fs";
25
- import { dirname, join, resolve } from "node:path";
26
- import { fileURLToPath } from "node:url";
27
- const __dirname = dirname(fileURLToPath(import.meta.url));
28
- const ROOT = resolve(__dirname, "..", "..");
29
- // ---------------------------------------------------------------------------
30
- // Core logic (exported for testing)
31
- // ---------------------------------------------------------------------------
32
- /**
33
- * Format a discovery report as markdown.
34
- */
35
- export function formatDiscoveryMarkdown(report) {
36
- const lines = [];
37
- // Header
38
- lines.push("## 🔍 Agent Discoverability Report");
39
- lines.push("");
40
- if (report.baseUrl) {
41
- lines.push(`**Base URL:** ${report.baseUrl}`);
42
- }
43
- lines.push("**Mode:** Agentic");
44
- lines.push("");
45
- // Retrieval summary table
46
- lines.push("### Retrieval Summary");
47
- lines.push("");
48
- lines.push("| Metric | Value |");
49
- lines.push("|---|---|");
50
- lines.push(`| Recall (canonical docs found) | ${pct(report.overall.avgRecall)} (${report.totalHits}/${report.totalCanonicalDocs}) |`);
51
- lines.push(`| Precision (relevant docs fetched) | ${pct(report.overall.avgPrecision)} |`);
52
- lines.push(`| F1 Score | ${report.overall.avgF1.toFixed(2)} |`);
53
- lines.push(`| Invisible docs | ${report.invisibleDocs.length} |`);
54
- lines.push("");
55
- // Per-area breakdown
56
- if (report.areas.length > 0) {
57
- lines.push("### Per-Area Breakdown");
58
- lines.push("");
59
- lines.push("| Area | Recall | Precision | F1 | Tasks |");
60
- lines.push("|---|---|---|---|---|");
61
- for (const area of sortedAreas(report.areas)) {
62
- lines.push(`| ${area.area} | ${pct(area.avgRecall)} | ${pct(area.avgPrecision)} | ${area.avgF1.toFixed(2)} | ${area.taskCount} |`);
63
- }
64
- lines.push("");
65
- }
66
- // Invisible documents
67
- if (report.invisibleDocs.length > 0) {
68
- lines.push("### Invisible Documents (never retrieved by any task)");
69
- lines.push("");
70
- for (const doc of report.invisibleDocs) {
71
- const refs = doc.referencedBy.join(", ");
72
- lines.push(`- \`${doc.slug}\` — referenced by ${refs}`);
73
- }
74
- lines.push("");
75
- }
76
- // Recommendations
77
- if (report.recommendations.length > 0) {
78
- lines.push("### Recommendations");
79
- lines.push("");
80
- for (let i = 0; i < report.recommendations.length; i++) {
81
- lines.push(`${i + 1}. ${report.recommendations[i]}`);
82
- }
83
- lines.push("");
84
- }
85
- return lines.join("\n");
86
- }
87
- /**
88
- * Generate a structured discovery report from a score summary.
89
- *
90
- * @param summary - Parsed score-summary.json
91
- * @param areaFilter - Optional area names to include (all if empty)
92
- */
93
- export function generateDiscoveryReport(summary, areaFilter) {
94
- const metrics = summary.retrievalMetrics;
95
- if (!metrics) {
96
- throw new Error("score-summary.json does not contain retrievalMetrics. " +
97
- "Run an agentic evaluation first: pnpm pipeline -- --mode agentic");
98
- }
99
- // Apply area filter
100
- const areas = areaFilter && areaFilter.length > 0
101
- ? metrics.areas.filter((a) => areaFilter.includes(a.area))
102
- : metrics.areas;
103
- if (areaFilter && areaFilter.length > 0 && areas.length === 0) {
104
- throw new Error(`No retrieval data found for area(s): ${areaFilter.join(", ")}. ` +
105
- `Available areas: ${metrics.areas.map((a) => a.area).join(", ")}`);
106
- }
107
- // Recompute overall metrics for filtered areas
108
- const overall = areas.length === metrics.areas.length
109
- ? metrics.overall
110
- : computeOverall(areas);
111
- // Build invisible docs list with task references
112
- const invisibleDocs = buildInvisibleDocs(areas);
113
- // Compute totals for the summary table
114
- const allTasks = areas.flatMap((a) => a.tasks);
115
- const allExpected = new Set(allTasks.flatMap((t) => t.expected));
116
- const allHits = new Set(allTasks.flatMap((t) => t.hits));
117
- const totalCanonicalDocs = allExpected.size;
118
- const totalHits = allHits.size;
119
- // Generate recommendations
120
- const recommendations = generateRecommendations(invisibleDocs, areas, overall);
121
- return {
122
- areas,
123
- baseUrl: summary.source?.baseUrl,
124
- invisibleDocs,
125
- overall,
126
- recommendations,
127
- timestamp: summary.timestamp,
128
- totalCanonicalDocs,
129
- totalHits,
130
- };
131
- }
132
- // ---------------------------------------------------------------------------
133
- // Helpers (alphabetical for perfectionist/sort-modules)
134
- // ---------------------------------------------------------------------------
135
- function buildInvisibleDocs(areas) {
136
- // Collect all invisible slugs and map them to the tasks that reference them
137
- const slugToTasks = new Map();
138
- for (const area of areas) {
139
- for (const task of area.tasks) {
140
- for (const slug of task.missed) {
141
- // Check if this slug is globally invisible (never retrieved by ANY task)
142
- const isGloballyInvisible = areas.every((a) => a.tasks.every((t) => !t.retrieved.includes(slug)));
143
- if (isGloballyInvisible) {
144
- if (!slugToTasks.has(slug)) {
145
- slugToTasks.set(slug, new Set());
146
- }
147
- slugToTasks.get(slug).add(task.taskId);
148
- }
149
- }
150
- }
151
- }
152
- return [...slugToTasks.entries()]
153
- .map(([slug, tasks]) => ({
154
- referencedBy: [...tasks].sort(),
155
- slug,
156
- }))
157
- .sort((a, b) => b.referencedBy.length - a.referencedBy.length);
158
- }
159
- function computeOverall(areas) {
160
- if (areas.length === 0) {
161
- return { avgF1: 0, avgPrecision: 0, avgRecall: 0 };
162
- }
163
- // Weight by task count for fair averaging
164
- const totalTasks = areas.reduce((s, a) => s + a.taskCount, 0);
165
- if (totalTasks === 0) {
166
- return { avgF1: 0, avgPrecision: 0, avgRecall: 0 };
167
- }
168
- const avgRecall = areas.reduce((s, a) => s + a.avgRecall * a.taskCount, 0) / totalTasks;
169
- const avgPrecision = areas.reduce((s, a) => s + a.avgPrecision * a.taskCount, 0) / totalTasks;
170
- const avgF1 = areas.reduce((s, a) => s + a.avgF1 * a.taskCount, 0) / totalTasks;
171
- return { avgF1, avgPrecision, avgRecall };
172
- }
173
- function generateRecommendations(invisibleDocs, areas, overall) {
174
- const recs = [];
175
- // Recommend adding invisible docs to llms.txt
176
- const highImpactInvisible = invisibleDocs.filter((d) => d.referencedBy.length > 0);
177
- for (const doc of highImpactInvisible.slice(0, 5)) {
178
- const taskWord = doc.referencedBy.length === 1 ? "task" : "tasks";
179
- recs.push(`Add \`${doc.slug}\` to llms.txt (referenced by ${doc.referencedBy.length} ${taskWord})`);
180
- }
181
- // Recommend cross-linking for invisible docs
182
- if (invisibleDocs.length > 0) {
183
- recs.push(`Improve cross-linking to ${invisibleDocs.length} invisible document${invisibleDocs.length === 1 ? "" : "s"}`);
184
- }
185
- // Flag low-recall areas
186
- const lowRecallAreas = areas.filter((a) => a.avgRecall < 0.5);
187
- for (const area of lowRecallAreas) {
188
- recs.push(`Investigate low recall in \`${area.area}\` (${pct(area.avgRecall)}) — agents miss most canonical docs`);
189
- }
190
- // Flag low-precision areas
191
- const lowPrecisionAreas = areas.filter((a) => a.avgPrecision < 0.5);
192
- for (const area of lowPrecisionAreas) {
193
- recs.push(`Review search relevance for \`${area.area}\` (precision ${pct(area.avgPrecision)}) — agents fetch many irrelevant docs`);
194
- }
195
- // Overall recommendation
196
- if (overall.avgF1 < 0.6) {
197
- recs.push("Overall F1 is below 0.60 — consider a documentation restructure for agent accessibility");
198
- }
199
- return recs;
200
- }
201
- function main() {
202
- const { areaFilter, output, summaryPath } = parseArgs(process.argv);
203
- if (!existsSync(summaryPath)) {
204
- console.error(`❌ Score summary not found: ${summaryPath}`);
205
- console.error("Run an agentic evaluation first: pnpm pipeline -- --mode agentic");
206
- process.exit(1);
207
- }
208
- const summary = JSON.parse(readFileSync(summaryPath, "utf-8"));
209
- const report = generateDiscoveryReport(summary, areaFilter.length > 0 ? areaFilter : undefined);
210
- const markdown = formatDiscoveryMarkdown(report);
211
- if (output) {
212
- writeFileSync(output, markdown, "utf-8");
213
- console.log(`✅ Discovery report written to ${output}`);
214
- }
215
- else {
216
- console.log(markdown);
217
- }
218
- }
219
- function parseArgs(argv) {
220
- const args = argv.slice(2);
221
- let output;
222
- const areaFilter = [];
223
- let summaryPath = join(ROOT, "results", "latest", "score-summary.json");
224
- for (let i = 0; i < args.length; i++) {
225
- if (args[i] === "--output" && args[i + 1]) {
226
- output = args[++i];
227
- }
228
- else if (args[i] === "--area" && args[i + 1]) {
229
- areaFilter.push(...args[++i].split(","));
230
- }
231
- else if (args[i] === "--input" && args[i + 1]) {
232
- summaryPath = args[++i];
233
- }
234
- else if (!args[i].startsWith("-")) {
235
- summaryPath = args[i];
236
- }
237
- }
238
- return { areaFilter, output, summaryPath };
239
- }
240
- function pct(value) {
241
- return `${Math.round(value * 100)}%`;
242
- }
243
- function sortedAreas(areas) {
244
- return [...areas].sort((a, b) => a.area.localeCompare(b.area));
245
- }
246
- // Only run when invoked directly
247
- if (process.argv[1]?.endsWith("discovery-report.ts") ||
248
- process.argv[1]?.endsWith("discovery-report.js")) {
249
- main();
250
- }
@@ -1,35 +0,0 @@
1
- /**
2
- * Fetch-docs.ts
3
- *
4
- * Pulls documentation from the Sanity CMS and generates markdown context
5
- * files for use in Promptfoo evaluations. Always produces canonical contexts;
6
- * other outputs are opt-in:
7
- *
8
- * 1. Canonical contexts — one file per evaluation task, containing
9
- * only the manually-annotated "gold" documents for that task (always)
10
- * 2. Feature-area contexts — one file per GROQ feature area query
11
- * (opt-in via --include-feature-areas)
12
- * 3. Full corpus — all articles in one file
13
- * (opt-in via --include-corpus)
14
- */
15
- import "dotenv/config";
16
- /**
17
- * Result of resolving --sanity-document IDs against canonical docs.
18
- *
19
- * Documents specified by ID either replace a canonical doc (if the fetched
20
- * document's slug matches one in the canonical set) or are appended as
21
- * additional context (if the slug is not in the canonical set).
22
- */
23
- export interface DocumentOverlay {
24
- /** Extra formatted content for docs that don't match any canonical slug */
25
- appendedContent: string[];
26
- /** Map from canonical slug → formatted content (replaces the normal fetch) */
27
- replacements: Map<string, string>;
28
- }
29
- /** Result of comparing canonical docs between published and perspective */
30
- export interface ReleaseImpact {
31
- added: string[];
32
- modified: string[];
33
- removed: string[];
34
- unchanged: string[];
35
- }