@sanity/ailf 2.0.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. package/LICENSE +21 -0
  2. package/dist/cli.js +0 -0
  3. package/package.json +24 -24
  4. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +0 -10
  5. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +0 -185
  6. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +0 -6
  7. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +0 -42
  8. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  9. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  10. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  11. package/dist/_vendor/ailf-tasks/index.js +0 -16
  12. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  13. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  14. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  15. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  16. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  17. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  18. package/dist/adapters/task-sources/yaml-task-source.d.ts +0 -18
  19. package/dist/adapters/task-sources/yaml-task-source.js +0 -139
  20. package/dist/agent-observer/test-imports.d.ts +0 -7
  21. package/dist/agent-observer/test-imports.js +0 -185
  22. package/dist/commands/update-quality-scores.d.ts +0 -5
  23. package/dist/commands/update-quality-scores.js +0 -20
  24. package/dist/lib/agent-behavior-report.d.ts +0 -8
  25. package/dist/lib/agent-behavior-report.js +0 -185
  26. package/dist/lib/baseline.d.ts +0 -19
  27. package/dist/lib/baseline.js +0 -153
  28. package/dist/lib/calculate-scores.d.ts +0 -23
  29. package/dist/lib/calculate-scores.js +0 -42
  30. package/dist/lib/compare.d.ts +0 -18
  31. package/dist/lib/compare.js +0 -170
  32. package/dist/lib/coverage-audit.d.ts +0 -4
  33. package/dist/lib/coverage-audit.js +0 -42
  34. package/dist/lib/discovery-report.d.ts +0 -13
  35. package/dist/lib/discovery-report.js +0 -57
  36. package/dist/lib/fetch-docs.d.ts +0 -30
  37. package/dist/lib/fetch-docs.js +0 -171
  38. package/dist/lib/generate-configs.d.ts +0 -25
  39. package/dist/lib/generate-configs.js +0 -42
  40. package/dist/lib/grader-api.d.ts +0 -21
  41. package/dist/lib/grader-api.js +0 -34
  42. package/dist/lib/grader-compare.d.ts +0 -19
  43. package/dist/lib/grader-compare.js +0 -91
  44. package/dist/lib/grader-consistency.d.ts +0 -27
  45. package/dist/lib/grader-consistency.js +0 -79
  46. package/dist/lib/grader-sensitivity.d.ts +0 -19
  47. package/dist/lib/grader-sensitivity.js +0 -75
  48. package/dist/lib/grader-validate.d.ts +0 -19
  49. package/dist/lib/grader-validate.js +0 -78
  50. package/dist/lib/measure-retrieval.d.ts +0 -14
  51. package/dist/lib/measure-retrieval.js +0 -71
  52. package/dist/lib/pr-comment.d.ts +0 -16
  53. package/dist/lib/pr-comment.js +0 -28
  54. package/dist/lib/readiness-report.d.ts +0 -13
  55. package/dist/lib/readiness-report.js +0 -108
  56. package/dist/lib/webhook-server.d.ts +0 -11
  57. package/dist/lib/webhook-server.js +0 -24
  58. package/dist/lib/weekly-digest.d.ts +0 -24
  59. package/dist/lib/weekly-digest.js +0 -148
  60. package/dist/orchestration/env-bridge.d.ts +0 -21
  61. package/dist/orchestration/env-bridge.js +0 -66
  62. package/dist/orchestration/steps/fetch-docs-shell.d.ts +0 -17
  63. package/dist/orchestration/steps/fetch-docs-shell.js +0 -30
  64. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +0 -9
  65. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +0 -339
  66. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +0 -70
  67. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
  68. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
  69. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
  70. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
  71. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
  72. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +0 -50
  73. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +0 -334
  74. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -69
  75. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -307
  76. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +0 -65
  77. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +0 -368
  78. package/dist/pipeline/compiler/task-bridge.d.ts +0 -41
  79. package/dist/pipeline/compiler/task-bridge.js +0 -92
  80. package/dist/pipeline/expand-tasks.d.ts +0 -232
  81. package/dist/pipeline/expand-tasks.js +0 -467
  82. package/dist/pipeline/generate-configs.d.ts +0 -92
  83. package/dist/pipeline/generate-configs.js +0 -445
  84. package/dist/pipeline/steps/calculate-scores-step.d.ts +0 -11
  85. package/dist/pipeline/steps/calculate-scores-step.js +0 -89
  86. package/dist/pipeline/steps/compare-step.d.ts +0 -18
  87. package/dist/pipeline/steps/compare-step.js +0 -90
  88. package/dist/pipeline/steps/eval-step.d.ts +0 -53
  89. package/dist/pipeline/steps/eval-step.js +0 -347
  90. package/dist/pipeline/steps/fetch-docs-step.d.ts +0 -11
  91. package/dist/pipeline/steps/fetch-docs-step.js +0 -84
  92. package/dist/pipeline/steps/generate-configs-step.d.ts +0 -11
  93. package/dist/pipeline/steps/generate-configs-step.js +0 -98
  94. package/dist/pipeline/steps/grader-consistency-step.d.ts +0 -21
  95. package/dist/pipeline/steps/grader-consistency-step.js +0 -74
  96. package/dist/pipeline/steps/publish-report-step.d.ts +0 -57
  97. package/dist/pipeline/steps/publish-report-step.js +0 -243
  98. package/dist/pipeline/steps/report-step.d.ts +0 -13
  99. package/dist/pipeline/steps/report-step.js +0 -56
  100. package/dist/pipeline/steps/update-scores-step.d.ts +0 -11
  101. package/dist/pipeline/steps/update-scores-step.js +0 -42
  102. package/dist/scripts/agent-behavior-report.d.ts +0 -19
  103. package/dist/scripts/agent-behavior-report.js +0 -315
  104. package/dist/scripts/baseline.d.ts +0 -43
  105. package/dist/scripts/baseline.js +0 -267
  106. package/dist/scripts/calculate-scores.d.ts +0 -166
  107. package/dist/scripts/calculate-scores.js +0 -1296
  108. package/dist/scripts/compare.d.ts +0 -22
  109. package/dist/scripts/compare.js +0 -334
  110. package/dist/scripts/coverage-audit.d.ts +0 -44
  111. package/dist/scripts/coverage-audit.js +0 -209
  112. package/dist/scripts/debug-eval.d.ts +0 -19
  113. package/dist/scripts/debug-eval.js +0 -73
  114. package/dist/scripts/discovery-report.d.ts +0 -58
  115. package/dist/scripts/discovery-report.js +0 -250
  116. package/dist/scripts/fetch-docs.d.ts +0 -35
  117. package/dist/scripts/fetch-docs.js +0 -472
  118. package/dist/scripts/generate-configs.d.ts +0 -66
  119. package/dist/scripts/generate-configs.js +0 -459
  120. package/dist/scripts/grader-api.d.ts +0 -27
  121. package/dist/scripts/grader-api.js +0 -206
  122. package/dist/scripts/grader-compare.d.ts +0 -22
  123. package/dist/scripts/grader-compare.js +0 -368
  124. package/dist/scripts/grader-consistency.d.ts +0 -20
  125. package/dist/scripts/grader-consistency.js +0 -313
  126. package/dist/scripts/grader-sensitivity.d.ts +0 -22
  127. package/dist/scripts/grader-sensitivity.js +0 -354
  128. package/dist/scripts/grader-validate.d.ts +0 -19
  129. package/dist/scripts/grader-validate.js +0 -267
  130. package/dist/scripts/measure-retrieval.d.ts +0 -10
  131. package/dist/scripts/measure-retrieval.js +0 -145
  132. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +0 -24
  133. package/dist/scripts/migrate-tasks-to-content-lake.js +0 -328
  134. package/dist/scripts/pipeline.d.ts +0 -76
  135. package/dist/scripts/pipeline.js +0 -1031
  136. package/dist/scripts/pr-comment.d.ts +0 -10
  137. package/dist/scripts/pr-comment.js +0 -510
  138. package/dist/scripts/readiness-report.d.ts +0 -88
  139. package/dist/scripts/readiness-report.js +0 -342
  140. package/dist/scripts/update-quality-scores.d.ts +0 -15
  141. package/dist/scripts/update-quality-scores.js +0 -184
  142. package/dist/scripts/validate-task-sources.d.ts +0 -21
  143. package/dist/scripts/validate-task-sources.js +0 -210
  144. package/dist/scripts/validate.d.ts +0 -13
  145. package/dist/scripts/validate.js +0 -79
  146. package/dist/scripts/webhook-server.d.ts +0 -26
  147. package/dist/scripts/webhook-server.js +0 -147
  148. package/dist/scripts/weekly-digest.d.ts +0 -24
  149. package/dist/scripts/weekly-digest.js +0 -144
  150. package/dist/sinks/format-slack.d.ts +0 -64
  151. package/dist/sinks/format-slack.js +0 -306
  152. package/dist/sinks/slack-sink.d.ts +0 -27
  153. package/dist/sinks/slack-sink.js +0 -78
  154. package/dist/sinks/webhook-sink.d.ts +0 -19
  155. package/dist/sinks/webhook-sink.js +0 -50
  156. package/tasks/.expanded.agentic.yaml +0 -280
  157. package/tasks/.expanded.yaml +0 -565
@@ -1,328 +0,0 @@
1
- #!/usr/bin/env tsx
2
- /**
3
- * Migration script: YAML tasks → Content Lake documents
4
- *
5
- * Reads all existing task definitions from tasks/*.yaml via YamlTaskSource,
6
- * reference solution files from canonical/reference-solutions/, and creates
7
- * corresponding ailf.featureArea, ailf.referenceSolution, and ailf.task
8
- * documents in the Sanity Content Lake.
9
- *
10
- * Idempotent — uses createOrReplace with deterministic document IDs.
11
- * Safe to run multiple times during development/testing.
12
- *
13
- * Usage:
14
- * cd packages/eval
15
- * npx tsx src/scripts/migrate-tasks-to-content-lake.ts
16
- * npx tsx src/scripts/migrate-tasks-to-content-lake.ts --dry-run
17
- *
18
- * Prerequisites:
19
- * - SANITY_API_TOKEN (or AILF_REPORT_SANITY_API_TOKEN) with write access
20
- * - SANITY_PROJECT_ID and SANITY_DATASET configured
21
- *
22
- * @see docs/archive/exec-plans/tasks-as-content/phase-3-migration.md
23
- */
24
- import { config as dotenvConfig } from "dotenv";
25
- import { existsSync, readFileSync } from "fs";
26
- import { dirname, extname, resolve } from "path";
27
- import { fileURLToPath } from "url";
28
- import { isSlugRef, } from "../_vendor/ailf-core/index.js";
29
- import { getSanityClient } from "../sanity/client.js";
30
- import { YamlTaskSource } from "../adapters/task-sources/yaml-task-source.js";
31
- const __dirname = dirname(fileURLToPath(import.meta.url));
32
- const ROOT = resolve(__dirname, "..", "..");
33
- // Load .env from repository root (same as CLI entry point)
34
- const envPath = resolve(ROOT, "..", "..", ".env");
35
- if (existsSync(envPath)) {
36
- dotenvConfig({ override: true, path: envPath });
37
- }
38
- // ---------------------------------------------------------------------------
39
- // Configuration
40
- // ---------------------------------------------------------------------------
41
- const DRY_RUN = process.argv.includes("--dry-run");
42
- /** Human-readable descriptions for feature areas (from YAML filenames). */
43
- const AREA_DESCRIPTIONS = {
44
- frameworks: "Framework Integrations",
45
- functions: "Sanity Functions",
46
- groq: "GROQ Query Language",
47
- "nextjs-live": "Next.js Integration",
48
- "studio-setup": "Studio Setup & Customization",
49
- "visual-editing": "Visual Editing",
50
- };
51
- // ---------------------------------------------------------------------------
52
- // Document ID conventions (deterministic for idempotent createOrReplace)
53
- // ---------------------------------------------------------------------------
54
- function featureAreaDocId(areaId) {
55
- return `ailf.featureArea.${areaId}`;
56
- }
57
- function referenceSolutionDocId(taskId) {
58
- return `ailf.referenceSolution.${taskId}`;
59
- }
60
- function taskDocId(taskId) {
61
- return `ailf.task.${taskId}`;
62
- }
63
- // ---------------------------------------------------------------------------
64
- // Portable Text helpers
65
- // ---------------------------------------------------------------------------
66
- /** Generate a simple random key for Portable Text array items. */
67
- function ptKey() {
68
- return Math.random().toString(36).slice(2, 14);
69
- }
70
- /**
71
- * Convert a source code file to a Portable Text array with a single code block.
72
- * This is the most faithful representation — one code block per file.
73
- */
74
- function sourceToPortableText(code, language) {
75
- return [
76
- {
77
- _key: ptKey(),
78
- _type: "code",
79
- code,
80
- language,
81
- },
82
- ];
83
- }
84
- /** Detect language from file extension. */
85
- function languageFromExt(filePath) {
86
- const ext = extname(filePath).toLowerCase();
87
- if (ext === ".ts" || ext === ".tsx")
88
- return "typescript";
89
- if (ext === ".js" || ext === ".jsx")
90
- return "javascript";
91
- return "typescript"; // default
92
- }
93
- // ---------------------------------------------------------------------------
94
- // Slug → article _id resolution
95
- // ---------------------------------------------------------------------------
96
- /**
97
- * Resolve canonical doc slugs to Sanity article document IDs.
98
- * Returns a map of slug → _id. Unresolved slugs are logged as warnings.
99
- */
100
- async function resolveCanonicalDocIds(client, slugs) {
101
- if (slugs.length === 0)
102
- return new Map();
103
- const query = /* groq */ `
104
- *[_type == "article" && slug.current in $slugs] {
105
- _id,
106
- "slug": slug.current
107
- }
108
- `;
109
- const results = await client.fetch(query, {
110
- slugs,
111
- });
112
- const map = new Map();
113
- for (const r of results) {
114
- map.set(r.slug, r._id);
115
- }
116
- // Log unresolved slugs
117
- for (const slug of slugs) {
118
- if (!map.has(slug)) {
119
- console.warn(` ⚠️ Canonical doc slug "${slug}" could not be resolved`);
120
- }
121
- }
122
- return map;
123
- }
124
- // ---------------------------------------------------------------------------
125
- // Document builders
126
- // ---------------------------------------------------------------------------
127
- function buildFeatureAreaDoc(areaId) {
128
- return {
129
- _id: featureAreaDocId(areaId),
130
- _type: "ailf.featureArea",
131
- areaId: { _type: "slug", current: areaId },
132
- description: AREA_DESCRIPTIONS[areaId] ?? areaId,
133
- };
134
- }
135
- function buildReferenceSolutionDoc(task, code, language) {
136
- return {
137
- _id: referenceSolutionDocId(task.id),
138
- _type: "ailf.referenceSolution",
139
- content: sourceToPortableText(code, language),
140
- language,
141
- title: `${task.title} — Reference Solution`,
142
- };
143
- }
144
- function buildTaskDoc(task, slugToDocId, hasReferenceSolution) {
145
- // Build canonical docs array with resolved references (slug refs only)
146
- const canonicalDocs = (task.context?.docs ?? []).map((ref) => {
147
- const resolvedId = isSlugRef(ref) ? slugToDocId.get(ref.slug) : undefined;
148
- return {
149
- _key: ptKey(),
150
- ...(resolvedId ? { doc: { _ref: resolvedId, _type: "reference" } } : {}),
151
- reason: ref.reason ?? "",
152
- };
153
- });
154
- // Build assertions array
155
- const assertArray = (task.assertions ?? []).map((a) => {
156
- const entry = {
157
- _key: ptKey(),
158
- type: a.type,
159
- };
160
- if (a.type === "llm-rubric" && "template" in a) {
161
- entry.template = a.template;
162
- if ("criteria" in a && Array.isArray(a.criteria)) {
163
- entry.criteria = a.criteria;
164
- }
165
- }
166
- if ("value" in a && a.value !== undefined) {
167
- // Store value as a string (matching the Studio schema text field)
168
- entry.value =
169
- typeof a.value === "string" ? a.value : JSON.stringify(a.value);
170
- }
171
- if ("threshold" in a && a.threshold !== undefined) {
172
- entry.threshold = a.threshold;
173
- }
174
- if (a.weight !== undefined) {
175
- entry.weight = a.weight;
176
- }
177
- return entry;
178
- });
179
- const area = task.area ?? "";
180
- const doc = {
181
- _id: taskDocId(task.id),
182
- _type: "ailf.task",
183
- assert: assertArray,
184
- canonicalDocs,
185
- description: task.title,
186
- docCoverage: task.docCoverage ?? false,
187
- featureArea: {
188
- _ref: featureAreaDocId(area),
189
- _type: "reference",
190
- },
191
- id: { _type: "slug", current: task.id },
192
- taskPrompt: task.prompt?.text ?? "",
193
- };
194
- // Optional reference solution
195
- if (hasReferenceSolution) {
196
- doc.referenceSolution = {
197
- _ref: referenceSolutionDocId(task.id),
198
- _type: "reference",
199
- };
200
- }
201
- // Optional baseline config
202
- if (task.baseline) {
203
- doc.baseline = {
204
- ...(task.baseline.enabled !== undefined
205
- ? { enabled: task.baseline.enabled }
206
- : {}),
207
- ...(task.baseline.rubric ? { rubric: task.baseline.rubric } : {}),
208
- };
209
- }
210
- return doc;
211
- }
212
- // ---------------------------------------------------------------------------
213
- // Main migration
214
- // ---------------------------------------------------------------------------
215
- async function migrate() {
216
- console.log("\n📦 Tasks-as-Content Migration");
217
- console.log("=".repeat(50));
218
- if (DRY_RUN) {
219
- console.log(" Mode: DRY RUN (no writes)");
220
- }
221
- // 1. Load all tasks from YAML
222
- console.log("\n1️⃣ Loading tasks from YAML...");
223
- const taskSource = new YamlTaskSource(ROOT);
224
- const tasks = (await taskSource.loadTasks()).filter((t) => t.mode === "literacy");
225
- console.log(` Loaded ${tasks.length} tasks`);
226
- // 2. Extract unique feature areas
227
- const areas = [...new Set(tasks.map((t) => t.area ?? ""))].sort();
228
- console.log(` Found ${areas.length} feature areas: ${areas.join(", ")}`);
229
- // 3. Collect all canonical doc slugs for batch resolution (slug refs only)
230
- const allSlugs = [
231
- ...new Set(tasks.flatMap((t) => (t.context?.docs ?? []).filter(isSlugRef).map((d) => d.slug))),
232
- ];
233
- console.log(` Found ${allSlugs.length} unique canonical doc slugs`);
234
- // 4. Create Sanity client with write token
235
- // Use the report token (which has write access) but target the content
236
- // dataset (where articles live, so canonicalDocs references resolve).
237
- const writeToken = process.env.AILF_REPORT_SANITY_API_TOKEN ?? process.env.SANITY_API_TOKEN;
238
- if (!writeToken && !DRY_RUN) {
239
- console.error(" ❌ No write token found. Set AILF_REPORT_SANITY_API_TOKEN or SANITY_API_TOKEN.");
240
- process.exit(1);
241
- }
242
- const client = getSanityClient({
243
- ...(writeToken ? { token: writeToken } : {}),
244
- });
245
- console.log(` Sanity: project=${client.config().projectId} dataset=${client.config().dataset}`);
246
- // 5. Resolve canonical doc slugs → article _ids
247
- console.log("\n2️⃣ Resolving canonical doc slugs...");
248
- const slugToDocId = await resolveCanonicalDocIds(client, allSlugs);
249
- const resolved = slugToDocId.size;
250
- const unresolved = allSlugs.length - resolved;
251
- console.log(` Resolved: ${resolved}/${allSlugs.length}${unresolved > 0 ? ` (${unresolved} unresolved)` : ""}`);
252
- // 6. Build all documents
253
- console.log("\n3️⃣ Building documents...");
254
- // Feature areas
255
- const areaDocs = areas.map((a) => buildFeatureAreaDoc(a));
256
- console.log(` ${areaDocs.length} ailf.featureArea documents`);
257
- // Reference solutions
258
- const refSolutionDocs = [];
259
- const tasksWithSolutions = new Set();
260
- for (const task of tasks) {
261
- if (!task.referenceSolution)
262
- continue;
263
- const filePath = resolve(ROOT, "canonical", task.referenceSolution);
264
- try {
265
- const code = readFileSync(filePath, "utf-8");
266
- const language = languageFromExt(filePath);
267
- refSolutionDocs.push(buildReferenceSolutionDoc(task, code, language));
268
- tasksWithSolutions.add(task.id);
269
- }
270
- catch (err) {
271
- console.warn(` ⚠️ Could not read reference solution for ${task.id}: ${filePath}`);
272
- if (err instanceof Error) {
273
- console.warn(` ${err.message}`);
274
- }
275
- }
276
- }
277
- console.log(` ${refSolutionDocs.length} ailf.referenceSolution documents`);
278
- // Tasks
279
- const taskDocs = tasks.map((t) => buildTaskDoc(t, slugToDocId, tasksWithSolutions.has(t.id)));
280
- console.log(` ${taskDocs.length} ailf.task documents`);
281
- // 7. Write to Content Lake
282
- const allDocs = [...areaDocs, ...refSolutionDocs, ...taskDocs];
283
- console.log(`\n4️⃣ Writing ${allDocs.length} documents...`);
284
- if (DRY_RUN) {
285
- console.log(" DRY RUN — skipping writes");
286
- console.log("\n Documents that would be created:");
287
- for (const doc of allDocs) {
288
- console.log(` ${doc._type} → ${doc._id}`);
289
- }
290
- }
291
- else {
292
- // Use a transaction for atomicity
293
- let tx = client.transaction();
294
- for (const doc of allDocs) {
295
- tx = tx.createOrReplace(doc);
296
- }
297
- try {
298
- const result = await tx.commit();
299
- console.log(` ✅ Successfully wrote ${result.documentIds.length} documents`);
300
- }
301
- catch (err) {
302
- console.error(" ❌ Transaction failed:");
303
- if (err instanceof Error) {
304
- console.error(` ${err.message}`);
305
- }
306
- process.exit(1);
307
- }
308
- }
309
- // 8. Summary
310
- console.log("\n" + "=".repeat(50));
311
- console.log("📊 Migration Summary");
312
- console.log(` Feature areas: ${areaDocs.length}`);
313
- console.log(` Reference solutions: ${refSolutionDocs.length}`);
314
- console.log(` Tasks: ${taskDocs.length}`);
315
- console.log(` Total documents: ${allDocs.length}`);
316
- console.log(` Canonical slugs: ${resolved} resolved, ${unresolved} unresolved`);
317
- if (DRY_RUN) {
318
- console.log("\n ℹ️ Run without --dry-run to write to the Content Lake");
319
- }
320
- console.log("");
321
- }
322
- // ---------------------------------------------------------------------------
323
- // Run
324
- // ---------------------------------------------------------------------------
325
- migrate().catch((err) => {
326
- console.error("Migration failed:", err);
327
- process.exit(1);
328
- });
@@ -1,76 +0,0 @@
1
- /**
2
- * pipeline.ts
3
- *
4
- * CLI orchestrator for the modular evaluation pipeline.
5
- * Runs steps in sequence with validation between each.
6
- *
7
- * This is the single entry point for both local and CI evaluation.
8
- * The CI workflow (eval.yml) calls this script, then layers on
9
- * CI-specific post-steps (PR comment posting, artifact upload).
10
- *
11
- * Usage:
12
- * pnpm pipeline # full baseline pipeline
13
- * pnpm pipeline --dry-run # validate only, no execution
14
- * pnpm pipeline --skip-fetch # reuse cached doc contexts
15
- * pnpm pipeline --skip-eval # recalculate from existing results
16
- * pnpm pipeline --mode agentic # run agentic pipeline
17
- * pnpm pipeline --mode observed # run observed pipeline
18
- * pnpm pipeline --source staging # use staging doc source
19
- * pnpm pipeline --debug # run first 2 tests only (fast)
20
- * pnpm pipeline --debug-n 5 # run first 5 tests
21
- * pnpm pipeline --debug-pattern "Blog" # filter by description
22
- * pnpm pipeline --debug-sample 3 # random sample of 3 tests
23
- * pnpm pipeline --no-cache # bypass caching, force re-run
24
- * pnpm pipeline --concurrency 64 # override max parallel API calls
25
- * pnpm pipeline --area groq,frameworks # only evaluate these areas
26
- * pnpm pipeline --task groq-blog-queries # only evaluate this task
27
- * pnpm pipeline --changed-docs groq-introduction,how-queries-work
28
- * # auto-scope to affected tasks
29
- * pnpm pipeline --url https://... # override docs base URL
30
- * pnpm pipeline --sanity-dataset staging # override Sanity dataset
31
- * pnpm pipeline --sanity-project abc123 # override Sanity project ID
32
- * pnpm pipeline --sanity-perspective agent-c7OKTk
33
- * # evaluate a Sanity release
34
- * pnpm pipeline --sanity-document <uuid>
35
- * # evaluate specific document(s)
36
- * pnpm pipeline --sanity-document <uuid> --sanity-documents <uuid>
37
- * # singular and plural aliases work
38
- * pnpm pipeline --header "X-Vercel-Protection-Bypass: <secret>"
39
- * # custom HTTP header (repeatable)
40
- * pnpm pipeline --allowed-origin my-branch.sanity.build
41
- * # sandbox agent to this origin
42
- * pnpm pipeline --before published # run before/after impact evaluation
43
- * pnpm pipeline --before production # before = production source
44
- * pnpm pipeline --before results/baselines/20260310.json # use existing scores
45
- * pnpm pipeline --before latest-baseline # use most recent baseline
46
- * pnpm pipeline --compare # compare scores against latest baseline
47
- * pnpm pipeline --compare --compare-baseline <path> # compare against specific file
48
- * pnpm pipeline --compare --threshold 5 # noise threshold for unchanged (default: 2)
49
- * pnpm pipeline --output /tmp/report.md # write report to specific path
50
- * pnpm pipeline --promptfoo-url <url> # include Promptfoo URL in report
51
- * pnpm pipeline --gap-analysis # run failure mode + impact analysis
52
- * pnpm pipeline --publish # write report to Sanity + fan out to sinks
53
- * pnpm pipeline --publish --publish-tag "daily-2026-03-11" # tag the report
54
- * pnpm pipeline --publish --report-dataset ailf-reports # report store dataset
55
- * pnpm pipeline --publish --report-project abc123 # report store project
56
- *
57
- * Override precedence (highest wins):
58
- * CLI flag (--url, --sanity-dataset, --sanity-project, --allowed-origin)
59
- * → Environment variable (DOC_BASE_URL, SANITY_DATASET, SANITY_PROJECT_ID, DOC_ALLOWED_ORIGIN)
60
- * → config/sources.yaml default value
61
- *
62
- * --header flags are additive and do not override env vars — they are
63
- * always merged with any headers defined in DOC_HEADERS env var.
64
- *
65
- * Environment variable fallbacks (for CI):
66
- * DEBUG_EVAL=1 → --debug
67
- * DEBUG_EVAL_N=2 → --debug-n 2
68
- * DEBUG_EVAL_PATTERN → --debug-pattern
69
- * DEBUG_EVAL_SAMPLE → --debug-sample
70
- * EVAL_FILTER_AREAS → --area
71
- * EVAL_FILTER_TASKS → --task
72
- * EVAL_CHANGED_DOCS → --changed-docs
73
- * AILF_REPORT_DATASET → --report-dataset (report store, not eval)
74
- * AILF_REPORT_PROJECT_ID → --report-project (report store, not eval)
75
- */
76
- export {};