@sanity/ailf 2.0.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/LICENSE +21 -0
  2. package/dist/_vendor/ailf-core/examples/index.d.ts +50 -1
  3. package/dist/_vendor/ailf-core/examples/index.js +66 -1
  4. package/dist/agent-harness/assertions-runtime.d.ts +49 -0
  5. package/dist/agent-harness/assertions-runtime.js +138 -0
  6. package/dist/agent-harness/provider.d.ts +58 -0
  7. package/dist/agent-harness/provider.js +104 -0
  8. package/dist/cli.js +0 -0
  9. package/dist/commands/init.js +3 -0
  10. package/dist/orchestration/steps/generate-configs-step.d.ts +7 -0
  11. package/dist/orchestration/steps/generate-configs-step.js +35 -2
  12. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +39 -25
  13. package/dist/pipeline/compiler/compiler-to-yaml.js +78 -7
  14. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +9 -0
  15. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +28 -85
  16. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +22 -15
  17. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +8 -1
  18. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +42 -12
  19. package/package.json +25 -24
  20. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +0 -10
  21. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +0 -185
  22. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +0 -6
  23. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +0 -42
  24. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  25. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  26. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  27. package/dist/_vendor/ailf-tasks/index.js +0 -16
  28. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  29. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  30. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  31. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  32. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  33. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  34. package/dist/adapters/task-sources/yaml-task-source.d.ts +0 -18
  35. package/dist/adapters/task-sources/yaml-task-source.js +0 -139
  36. package/dist/agent-observer/test-imports.d.ts +0 -7
  37. package/dist/agent-observer/test-imports.js +0 -185
  38. package/dist/commands/update-quality-scores.d.ts +0 -5
  39. package/dist/commands/update-quality-scores.js +0 -20
  40. package/dist/lib/agent-behavior-report.d.ts +0 -8
  41. package/dist/lib/agent-behavior-report.js +0 -185
  42. package/dist/lib/baseline.d.ts +0 -19
  43. package/dist/lib/baseline.js +0 -153
  44. package/dist/lib/calculate-scores.d.ts +0 -23
  45. package/dist/lib/calculate-scores.js +0 -42
  46. package/dist/lib/compare.d.ts +0 -18
  47. package/dist/lib/compare.js +0 -170
  48. package/dist/lib/coverage-audit.d.ts +0 -4
  49. package/dist/lib/coverage-audit.js +0 -42
  50. package/dist/lib/discovery-report.d.ts +0 -13
  51. package/dist/lib/discovery-report.js +0 -57
  52. package/dist/lib/fetch-docs.d.ts +0 -30
  53. package/dist/lib/fetch-docs.js +0 -171
  54. package/dist/lib/generate-configs.d.ts +0 -25
  55. package/dist/lib/generate-configs.js +0 -42
  56. package/dist/lib/grader-api.d.ts +0 -21
  57. package/dist/lib/grader-api.js +0 -34
  58. package/dist/lib/grader-compare.d.ts +0 -19
  59. package/dist/lib/grader-compare.js +0 -91
  60. package/dist/lib/grader-consistency.d.ts +0 -27
  61. package/dist/lib/grader-consistency.js +0 -79
  62. package/dist/lib/grader-sensitivity.d.ts +0 -19
  63. package/dist/lib/grader-sensitivity.js +0 -75
  64. package/dist/lib/grader-validate.d.ts +0 -19
  65. package/dist/lib/grader-validate.js +0 -78
  66. package/dist/lib/measure-retrieval.d.ts +0 -14
  67. package/dist/lib/measure-retrieval.js +0 -71
  68. package/dist/lib/pr-comment.d.ts +0 -16
  69. package/dist/lib/pr-comment.js +0 -28
  70. package/dist/lib/readiness-report.d.ts +0 -13
  71. package/dist/lib/readiness-report.js +0 -108
  72. package/dist/lib/webhook-server.d.ts +0 -11
  73. package/dist/lib/webhook-server.js +0 -24
  74. package/dist/lib/weekly-digest.d.ts +0 -24
  75. package/dist/lib/weekly-digest.js +0 -148
  76. package/dist/orchestration/env-bridge.d.ts +0 -21
  77. package/dist/orchestration/env-bridge.js +0 -66
  78. package/dist/orchestration/steps/fetch-docs-shell.d.ts +0 -17
  79. package/dist/orchestration/steps/fetch-docs-shell.js +0 -30
  80. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +0 -9
  81. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +0 -339
  82. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +0 -70
  83. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
  84. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
  85. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
  86. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
  87. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
  88. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +0 -50
  89. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +0 -334
  90. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -69
  91. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -307
  92. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +0 -65
  93. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +0 -368
  94. package/dist/pipeline/compiler/task-bridge.d.ts +0 -41
  95. package/dist/pipeline/compiler/task-bridge.js +0 -92
  96. package/dist/pipeline/expand-tasks.d.ts +0 -232
  97. package/dist/pipeline/expand-tasks.js +0 -467
  98. package/dist/pipeline/generate-configs.d.ts +0 -92
  99. package/dist/pipeline/generate-configs.js +0 -445
  100. package/dist/pipeline/steps/calculate-scores-step.d.ts +0 -11
  101. package/dist/pipeline/steps/calculate-scores-step.js +0 -89
  102. package/dist/pipeline/steps/compare-step.d.ts +0 -18
  103. package/dist/pipeline/steps/compare-step.js +0 -90
  104. package/dist/pipeline/steps/eval-step.d.ts +0 -53
  105. package/dist/pipeline/steps/eval-step.js +0 -347
  106. package/dist/pipeline/steps/fetch-docs-step.d.ts +0 -11
  107. package/dist/pipeline/steps/fetch-docs-step.js +0 -84
  108. package/dist/pipeline/steps/generate-configs-step.d.ts +0 -11
  109. package/dist/pipeline/steps/generate-configs-step.js +0 -98
  110. package/dist/pipeline/steps/grader-consistency-step.d.ts +0 -21
  111. package/dist/pipeline/steps/grader-consistency-step.js +0 -74
  112. package/dist/pipeline/steps/publish-report-step.d.ts +0 -57
  113. package/dist/pipeline/steps/publish-report-step.js +0 -243
  114. package/dist/pipeline/steps/report-step.d.ts +0 -13
  115. package/dist/pipeline/steps/report-step.js +0 -56
  116. package/dist/pipeline/steps/update-scores-step.d.ts +0 -11
  117. package/dist/pipeline/steps/update-scores-step.js +0 -42
  118. package/dist/scripts/agent-behavior-report.d.ts +0 -19
  119. package/dist/scripts/agent-behavior-report.js +0 -315
  120. package/dist/scripts/baseline.d.ts +0 -43
  121. package/dist/scripts/baseline.js +0 -267
  122. package/dist/scripts/calculate-scores.d.ts +0 -166
  123. package/dist/scripts/calculate-scores.js +0 -1296
  124. package/dist/scripts/compare.d.ts +0 -22
  125. package/dist/scripts/compare.js +0 -334
  126. package/dist/scripts/coverage-audit.d.ts +0 -44
  127. package/dist/scripts/coverage-audit.js +0 -209
  128. package/dist/scripts/debug-eval.d.ts +0 -19
  129. package/dist/scripts/debug-eval.js +0 -73
  130. package/dist/scripts/discovery-report.d.ts +0 -58
  131. package/dist/scripts/discovery-report.js +0 -250
  132. package/dist/scripts/fetch-docs.d.ts +0 -35
  133. package/dist/scripts/fetch-docs.js +0 -472
  134. package/dist/scripts/generate-configs.d.ts +0 -66
  135. package/dist/scripts/generate-configs.js +0 -459
  136. package/dist/scripts/grader-api.d.ts +0 -27
  137. package/dist/scripts/grader-api.js +0 -206
  138. package/dist/scripts/grader-compare.d.ts +0 -22
  139. package/dist/scripts/grader-compare.js +0 -368
  140. package/dist/scripts/grader-consistency.d.ts +0 -20
  141. package/dist/scripts/grader-consistency.js +0 -313
  142. package/dist/scripts/grader-sensitivity.d.ts +0 -22
  143. package/dist/scripts/grader-sensitivity.js +0 -354
  144. package/dist/scripts/grader-validate.d.ts +0 -19
  145. package/dist/scripts/grader-validate.js +0 -267
  146. package/dist/scripts/measure-retrieval.d.ts +0 -10
  147. package/dist/scripts/measure-retrieval.js +0 -145
  148. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +0 -24
  149. package/dist/scripts/migrate-tasks-to-content-lake.js +0 -328
  150. package/dist/scripts/pipeline.d.ts +0 -76
  151. package/dist/scripts/pipeline.js +0 -1031
  152. package/dist/scripts/pr-comment.d.ts +0 -10
  153. package/dist/scripts/pr-comment.js +0 -510
  154. package/dist/scripts/readiness-report.d.ts +0 -88
  155. package/dist/scripts/readiness-report.js +0 -342
  156. package/dist/scripts/update-quality-scores.d.ts +0 -15
  157. package/dist/scripts/update-quality-scores.js +0 -184
  158. package/dist/scripts/validate-task-sources.d.ts +0 -21
  159. package/dist/scripts/validate-task-sources.js +0 -210
  160. package/dist/scripts/validate.d.ts +0 -13
  161. package/dist/scripts/validate.js +0 -79
  162. package/dist/scripts/webhook-server.d.ts +0 -26
  163. package/dist/scripts/webhook-server.js +0 -147
  164. package/dist/scripts/weekly-digest.d.ts +0 -24
  165. package/dist/scripts/weekly-digest.js +0 -144
  166. package/dist/sinks/format-slack.d.ts +0 -64
  167. package/dist/sinks/format-slack.js +0 -306
  168. package/dist/sinks/slack-sink.d.ts +0 -27
  169. package/dist/sinks/slack-sink.js +0 -78
  170. package/dist/sinks/webhook-sink.d.ts +0 -19
  171. package/dist/sinks/webhook-sink.js +0 -50
  172. package/tasks/.expanded.agentic.yaml +0 -280
  173. package/tasks/.expanded.yaml +0 -565
@@ -1,328 +0,0 @@
1
- #!/usr/bin/env tsx
2
- /**
3
- * Migration script: YAML tasks → Content Lake documents
4
- *
5
- * Reads all existing task definitions from tasks/*.yaml via YamlTaskSource,
6
- * reference solution files from canonical/reference-solutions/, and creates
7
- * corresponding ailf.featureArea, ailf.referenceSolution, and ailf.task
8
- * documents in the Sanity Content Lake.
9
- *
10
- * Idempotent — uses createOrReplace with deterministic document IDs.
11
- * Safe to run multiple times during development/testing.
12
- *
13
- * Usage:
14
- * cd packages/eval
15
- * npx tsx src/scripts/migrate-tasks-to-content-lake.ts
16
- * npx tsx src/scripts/migrate-tasks-to-content-lake.ts --dry-run
17
- *
18
- * Prerequisites:
19
- * - SANITY_API_TOKEN (or AILF_REPORT_SANITY_API_TOKEN) with write access
20
- * - SANITY_PROJECT_ID and SANITY_DATASET configured
21
- *
22
- * @see docs/archive/exec-plans/tasks-as-content/phase-3-migration.md
23
- */
24
- import { config as dotenvConfig } from "dotenv";
25
- import { existsSync, readFileSync } from "fs";
26
- import { dirname, extname, resolve } from "path";
27
- import { fileURLToPath } from "url";
28
- import { isSlugRef, } from "../_vendor/ailf-core/index.js";
29
- import { getSanityClient } from "../sanity/client.js";
30
- import { YamlTaskSource } from "../adapters/task-sources/yaml-task-source.js";
31
- const __dirname = dirname(fileURLToPath(import.meta.url));
32
- const ROOT = resolve(__dirname, "..", "..");
33
- // Load .env from repository root (same as CLI entry point)
34
- const envPath = resolve(ROOT, "..", "..", ".env");
35
- if (existsSync(envPath)) {
36
- dotenvConfig({ override: true, path: envPath });
37
- }
38
- // ---------------------------------------------------------------------------
39
- // Configuration
40
- // ---------------------------------------------------------------------------
41
- const DRY_RUN = process.argv.includes("--dry-run");
42
- /** Human-readable descriptions for feature areas (from YAML filenames). */
43
- const AREA_DESCRIPTIONS = {
44
- frameworks: "Framework Integrations",
45
- functions: "Sanity Functions",
46
- groq: "GROQ Query Language",
47
- "nextjs-live": "Next.js Integration",
48
- "studio-setup": "Studio Setup & Customization",
49
- "visual-editing": "Visual Editing",
50
- };
51
- // ---------------------------------------------------------------------------
52
- // Document ID conventions (deterministic for idempotent createOrReplace)
53
- // ---------------------------------------------------------------------------
54
- function featureAreaDocId(areaId) {
55
- return `ailf.featureArea.${areaId}`;
56
- }
57
- function referenceSolutionDocId(taskId) {
58
- return `ailf.referenceSolution.${taskId}`;
59
- }
60
- function taskDocId(taskId) {
61
- return `ailf.task.${taskId}`;
62
- }
63
- // ---------------------------------------------------------------------------
64
- // Portable Text helpers
65
- // ---------------------------------------------------------------------------
66
- /** Generate a simple random key for Portable Text array items. */
67
- function ptKey() {
68
- return Math.random().toString(36).slice(2, 14);
69
- }
70
- /**
71
- * Convert a source code file to a Portable Text array with a single code block.
72
- * This is the most faithful representation — one code block per file.
73
- */
74
- function sourceToPortableText(code, language) {
75
- return [
76
- {
77
- _key: ptKey(),
78
- _type: "code",
79
- code,
80
- language,
81
- },
82
- ];
83
- }
84
- /** Detect language from file extension. */
85
- function languageFromExt(filePath) {
86
- const ext = extname(filePath).toLowerCase();
87
- if (ext === ".ts" || ext === ".tsx")
88
- return "typescript";
89
- if (ext === ".js" || ext === ".jsx")
90
- return "javascript";
91
- return "typescript"; // default
92
- }
93
- // ---------------------------------------------------------------------------
94
- // Slug → article _id resolution
95
- // ---------------------------------------------------------------------------
96
- /**
97
- * Resolve canonical doc slugs to Sanity article document IDs.
98
- * Returns a map of slug → _id. Unresolved slugs are logged as warnings.
99
- */
100
- async function resolveCanonicalDocIds(client, slugs) {
101
- if (slugs.length === 0)
102
- return new Map();
103
- const query = /* groq */ `
104
- *[_type == "article" && slug.current in $slugs] {
105
- _id,
106
- "slug": slug.current
107
- }
108
- `;
109
- const results = await client.fetch(query, {
110
- slugs,
111
- });
112
- const map = new Map();
113
- for (const r of results) {
114
- map.set(r.slug, r._id);
115
- }
116
- // Log unresolved slugs
117
- for (const slug of slugs) {
118
- if (!map.has(slug)) {
119
- console.warn(` ⚠️ Canonical doc slug "${slug}" could not be resolved`);
120
- }
121
- }
122
- return map;
123
- }
124
- // ---------------------------------------------------------------------------
125
- // Document builders
126
- // ---------------------------------------------------------------------------
127
- function buildFeatureAreaDoc(areaId) {
128
- return {
129
- _id: featureAreaDocId(areaId),
130
- _type: "ailf.featureArea",
131
- areaId: { _type: "slug", current: areaId },
132
- description: AREA_DESCRIPTIONS[areaId] ?? areaId,
133
- };
134
- }
135
- function buildReferenceSolutionDoc(task, code, language) {
136
- return {
137
- _id: referenceSolutionDocId(task.id),
138
- _type: "ailf.referenceSolution",
139
- content: sourceToPortableText(code, language),
140
- language,
141
- title: `${task.title} — Reference Solution`,
142
- };
143
- }
144
- function buildTaskDoc(task, slugToDocId, hasReferenceSolution) {
145
- // Build canonical docs array with resolved references (slug refs only)
146
- const canonicalDocs = (task.context?.docs ?? []).map((ref) => {
147
- const resolvedId = isSlugRef(ref) ? slugToDocId.get(ref.slug) : undefined;
148
- return {
149
- _key: ptKey(),
150
- ...(resolvedId ? { doc: { _ref: resolvedId, _type: "reference" } } : {}),
151
- reason: ref.reason ?? "",
152
- };
153
- });
154
- // Build assertions array
155
- const assertArray = (task.assertions ?? []).map((a) => {
156
- const entry = {
157
- _key: ptKey(),
158
- type: a.type,
159
- };
160
- if (a.type === "llm-rubric" && "template" in a) {
161
- entry.template = a.template;
162
- if ("criteria" in a && Array.isArray(a.criteria)) {
163
- entry.criteria = a.criteria;
164
- }
165
- }
166
- if ("value" in a && a.value !== undefined) {
167
- // Store value as a string (matching the Studio schema text field)
168
- entry.value =
169
- typeof a.value === "string" ? a.value : JSON.stringify(a.value);
170
- }
171
- if ("threshold" in a && a.threshold !== undefined) {
172
- entry.threshold = a.threshold;
173
- }
174
- if (a.weight !== undefined) {
175
- entry.weight = a.weight;
176
- }
177
- return entry;
178
- });
179
- const area = task.area ?? "";
180
- const doc = {
181
- _id: taskDocId(task.id),
182
- _type: "ailf.task",
183
- assert: assertArray,
184
- canonicalDocs,
185
- description: task.title,
186
- docCoverage: task.docCoverage ?? false,
187
- featureArea: {
188
- _ref: featureAreaDocId(area),
189
- _type: "reference",
190
- },
191
- id: { _type: "slug", current: task.id },
192
- taskPrompt: task.prompt?.text ?? "",
193
- };
194
- // Optional reference solution
195
- if (hasReferenceSolution) {
196
- doc.referenceSolution = {
197
- _ref: referenceSolutionDocId(task.id),
198
- _type: "reference",
199
- };
200
- }
201
- // Optional baseline config
202
- if (task.baseline) {
203
- doc.baseline = {
204
- ...(task.baseline.enabled !== undefined
205
- ? { enabled: task.baseline.enabled }
206
- : {}),
207
- ...(task.baseline.rubric ? { rubric: task.baseline.rubric } : {}),
208
- };
209
- }
210
- return doc;
211
- }
212
- // ---------------------------------------------------------------------------
213
- // Main migration
214
- // ---------------------------------------------------------------------------
215
- async function migrate() {
216
- console.log("\n📦 Tasks-as-Content Migration");
217
- console.log("=".repeat(50));
218
- if (DRY_RUN) {
219
- console.log(" Mode: DRY RUN (no writes)");
220
- }
221
- // 1. Load all tasks from YAML
222
- console.log("\n1️⃣ Loading tasks from YAML...");
223
- const taskSource = new YamlTaskSource(ROOT);
224
- const tasks = (await taskSource.loadTasks()).filter((t) => t.mode === "literacy");
225
- console.log(` Loaded ${tasks.length} tasks`);
226
- // 2. Extract unique feature areas
227
- const areas = [...new Set(tasks.map((t) => t.area ?? ""))].sort();
228
- console.log(` Found ${areas.length} feature areas: ${areas.join(", ")}`);
229
- // 3. Collect all canonical doc slugs for batch resolution (slug refs only)
230
- const allSlugs = [
231
- ...new Set(tasks.flatMap((t) => (t.context?.docs ?? []).filter(isSlugRef).map((d) => d.slug))),
232
- ];
233
- console.log(` Found ${allSlugs.length} unique canonical doc slugs`);
234
- // 4. Create Sanity client with write token
235
- // Use the report token (which has write access) but target the content
236
- // dataset (where articles live, so canonicalDocs references resolve).
237
- const writeToken = process.env.AILF_REPORT_SANITY_API_TOKEN ?? process.env.SANITY_API_TOKEN;
238
- if (!writeToken && !DRY_RUN) {
239
- console.error(" ❌ No write token found. Set AILF_REPORT_SANITY_API_TOKEN or SANITY_API_TOKEN.");
240
- process.exit(1);
241
- }
242
- const client = getSanityClient({
243
- ...(writeToken ? { token: writeToken } : {}),
244
- });
245
- console.log(` Sanity: project=${client.config().projectId} dataset=${client.config().dataset}`);
246
- // 5. Resolve canonical doc slugs → article _ids
247
- console.log("\n2️⃣ Resolving canonical doc slugs...");
248
- const slugToDocId = await resolveCanonicalDocIds(client, allSlugs);
249
- const resolved = slugToDocId.size;
250
- const unresolved = allSlugs.length - resolved;
251
- console.log(` Resolved: ${resolved}/${allSlugs.length}${unresolved > 0 ? ` (${unresolved} unresolved)` : ""}`);
252
- // 6. Build all documents
253
- console.log("\n3️⃣ Building documents...");
254
- // Feature areas
255
- const areaDocs = areas.map((a) => buildFeatureAreaDoc(a));
256
- console.log(` ${areaDocs.length} ailf.featureArea documents`);
257
- // Reference solutions
258
- const refSolutionDocs = [];
259
- const tasksWithSolutions = new Set();
260
- for (const task of tasks) {
261
- if (!task.referenceSolution)
262
- continue;
263
- const filePath = resolve(ROOT, "canonical", task.referenceSolution);
264
- try {
265
- const code = readFileSync(filePath, "utf-8");
266
- const language = languageFromExt(filePath);
267
- refSolutionDocs.push(buildReferenceSolutionDoc(task, code, language));
268
- tasksWithSolutions.add(task.id);
269
- }
270
- catch (err) {
271
- console.warn(` ⚠️ Could not read reference solution for ${task.id}: ${filePath}`);
272
- if (err instanceof Error) {
273
- console.warn(` ${err.message}`);
274
- }
275
- }
276
- }
277
- console.log(` ${refSolutionDocs.length} ailf.referenceSolution documents`);
278
- // Tasks
279
- const taskDocs = tasks.map((t) => buildTaskDoc(t, slugToDocId, tasksWithSolutions.has(t.id)));
280
- console.log(` ${taskDocs.length} ailf.task documents`);
281
- // 7. Write to Content Lake
282
- const allDocs = [...areaDocs, ...refSolutionDocs, ...taskDocs];
283
- console.log(`\n4️⃣ Writing ${allDocs.length} documents...`);
284
- if (DRY_RUN) {
285
- console.log(" DRY RUN — skipping writes");
286
- console.log("\n Documents that would be created:");
287
- for (const doc of allDocs) {
288
- console.log(` ${doc._type} → ${doc._id}`);
289
- }
290
- }
291
- else {
292
- // Use a transaction for atomicity
293
- let tx = client.transaction();
294
- for (const doc of allDocs) {
295
- tx = tx.createOrReplace(doc);
296
- }
297
- try {
298
- const result = await tx.commit();
299
- console.log(` ✅ Successfully wrote ${result.documentIds.length} documents`);
300
- }
301
- catch (err) {
302
- console.error(" ❌ Transaction failed:");
303
- if (err instanceof Error) {
304
- console.error(` ${err.message}`);
305
- }
306
- process.exit(1);
307
- }
308
- }
309
- // 8. Summary
310
- console.log("\n" + "=".repeat(50));
311
- console.log("📊 Migration Summary");
312
- console.log(` Feature areas: ${areaDocs.length}`);
313
- console.log(` Reference solutions: ${refSolutionDocs.length}`);
314
- console.log(` Tasks: ${taskDocs.length}`);
315
- console.log(` Total documents: ${allDocs.length}`);
316
- console.log(` Canonical slugs: ${resolved} resolved, ${unresolved} unresolved`);
317
- if (DRY_RUN) {
318
- console.log("\n ℹ️ Run without --dry-run to write to the Content Lake");
319
- }
320
- console.log("");
321
- }
322
- // ---------------------------------------------------------------------------
323
- // Run
324
- // ---------------------------------------------------------------------------
325
- migrate().catch((err) => {
326
- console.error("Migration failed:", err);
327
- process.exit(1);
328
- });
@@ -1,76 +0,0 @@
1
- /**
2
- * pipeline.ts
3
- *
4
- * CLI orchestrator for the modular evaluation pipeline.
5
- * Runs steps in sequence with validation between each.
6
- *
7
- * This is the single entry point for both local and CI evaluation.
8
- * The CI workflow (eval.yml) calls this script, then layers on
9
- * CI-specific post-steps (PR comment posting, artifact upload).
10
- *
11
- * Usage:
12
- * pnpm pipeline # full baseline pipeline
13
- * pnpm pipeline --dry-run # validate only, no execution
14
- * pnpm pipeline --skip-fetch # reuse cached doc contexts
15
- * pnpm pipeline --skip-eval # recalculate from existing results
16
- * pnpm pipeline --mode agentic # run agentic pipeline
17
- * pnpm pipeline --mode observed # run observed pipeline
18
- * pnpm pipeline --source staging # use staging doc source
19
- * pnpm pipeline --debug # run first 2 tests only (fast)
20
- * pnpm pipeline --debug-n 5 # run first 5 tests
21
- * pnpm pipeline --debug-pattern "Blog" # filter by description
22
- * pnpm pipeline --debug-sample 3 # random sample of 3 tests
23
- * pnpm pipeline --no-cache # bypass caching, force re-run
24
- * pnpm pipeline --concurrency 64 # override max parallel API calls
25
- * pnpm pipeline --area groq,frameworks # only evaluate these areas
26
- * pnpm pipeline --task groq-blog-queries # only evaluate this task
27
- * pnpm pipeline --changed-docs groq-introduction,how-queries-work
28
- * # auto-scope to affected tasks
29
- * pnpm pipeline --url https://... # override docs base URL
30
- * pnpm pipeline --sanity-dataset staging # override Sanity dataset
31
- * pnpm pipeline --sanity-project abc123 # override Sanity project ID
32
- * pnpm pipeline --sanity-perspective agent-c7OKTk
33
- * # evaluate a Sanity release
34
- * pnpm pipeline --sanity-document <uuid>
35
- * # evaluate specific document(s)
36
- * pnpm pipeline --sanity-document <uuid> --sanity-documents <uuid>
37
- * # singular and plural aliases work
38
- * pnpm pipeline --header "X-Vercel-Protection-Bypass: <secret>"
39
- * # custom HTTP header (repeatable)
40
- * pnpm pipeline --allowed-origin my-branch.sanity.build
41
- * # sandbox agent to this origin
42
- * pnpm pipeline --before published # run before/after impact evaluation
43
- * pnpm pipeline --before production # before = production source
44
- * pnpm pipeline --before results/baselines/20260310.json # use existing scores
45
- * pnpm pipeline --before latest-baseline # use most recent baseline
46
- * pnpm pipeline --compare # compare scores against latest baseline
47
- * pnpm pipeline --compare --compare-baseline <path> # compare against specific file
48
- * pnpm pipeline --compare --threshold 5 # noise threshold for unchanged (default: 2)
49
- * pnpm pipeline --output /tmp/report.md # write report to specific path
50
- * pnpm pipeline --promptfoo-url <url> # include Promptfoo URL in report
51
- * pnpm pipeline --gap-analysis # run failure mode + impact analysis
52
- * pnpm pipeline --publish # write report to Sanity + fan out to sinks
53
- * pnpm pipeline --publish --publish-tag "daily-2026-03-11" # tag the report
54
- * pnpm pipeline --publish --report-dataset ailf-reports # report store dataset
55
- * pnpm pipeline --publish --report-project abc123 # report store project
56
- *
57
- * Override precedence (highest wins):
58
- * CLI flag (--url, --sanity-dataset, --sanity-project, --allowed-origin)
59
- * → Environment variable (DOC_BASE_URL, SANITY_DATASET, SANITY_PROJECT_ID, DOC_ALLOWED_ORIGIN)
60
- * → config/sources.yaml default value
61
- *
62
- * --header flags are additive and do not override env vars — they are
63
- * always merged with any headers defined in DOC_HEADERS env var.
64
- *
65
- * Environment variable fallbacks (for CI):
66
- * DEBUG_EVAL=1 → --debug
67
- * DEBUG_EVAL_N=2 → --debug-n 2
68
- * DEBUG_EVAL_PATTERN → --debug-pattern
69
- * DEBUG_EVAL_SAMPLE → --debug-sample
70
- * EVAL_FILTER_AREAS → --area
71
- * EVAL_FILTER_TASKS → --task
72
- * EVAL_CHANGED_DOCS → --changed-docs
73
- * AILF_REPORT_DATASET → --report-dataset (report store, not eval)
74
- * AILF_REPORT_PROJECT_ID → --report-project (report store, not eval)
75
- */
76
- export {};