@sanity/ailf 2.0.2 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +50 -1
- package/dist/_vendor/ailf-core/examples/index.js +66 -1
- package/dist/agent-harness/assertions-runtime.d.ts +49 -0
- package/dist/agent-harness/assertions-runtime.js +138 -0
- package/dist/agent-harness/provider.d.ts +58 -0
- package/dist/agent-harness/provider.js +104 -0
- package/dist/cli.js +0 -0
- package/dist/commands/init.js +3 -0
- package/dist/orchestration/steps/generate-configs-step.d.ts +7 -0
- package/dist/orchestration/steps/generate-configs-step.js +35 -2
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +39 -25
- package/dist/pipeline/compiler/compiler-to-yaml.js +78 -7
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +28 -85
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +22 -15
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +8 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +42 -12
- package/package.json +25 -24
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +0 -10
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +0 -185
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +0 -6
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +0 -42
- package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
- package/dist/_vendor/ailf-tasks/cli.js +0 -61
- package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
- package/dist/_vendor/ailf-tasks/index.js +0 -16
- package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
- package/dist/_vendor/ailf-tasks/parser.js +0 -73
- package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
- package/dist/_vendor/ailf-tasks/schemas.js +0 -180
- package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
- package/dist/_vendor/ailf-tasks/validation.js +0 -162
- package/dist/adapters/task-sources/yaml-task-source.d.ts +0 -18
- package/dist/adapters/task-sources/yaml-task-source.js +0 -139
- package/dist/agent-observer/test-imports.d.ts +0 -7
- package/dist/agent-observer/test-imports.js +0 -185
- package/dist/commands/update-quality-scores.d.ts +0 -5
- package/dist/commands/update-quality-scores.js +0 -20
- package/dist/lib/agent-behavior-report.d.ts +0 -8
- package/dist/lib/agent-behavior-report.js +0 -185
- package/dist/lib/baseline.d.ts +0 -19
- package/dist/lib/baseline.js +0 -153
- package/dist/lib/calculate-scores.d.ts +0 -23
- package/dist/lib/calculate-scores.js +0 -42
- package/dist/lib/compare.d.ts +0 -18
- package/dist/lib/compare.js +0 -170
- package/dist/lib/coverage-audit.d.ts +0 -4
- package/dist/lib/coverage-audit.js +0 -42
- package/dist/lib/discovery-report.d.ts +0 -13
- package/dist/lib/discovery-report.js +0 -57
- package/dist/lib/fetch-docs.d.ts +0 -30
- package/dist/lib/fetch-docs.js +0 -171
- package/dist/lib/generate-configs.d.ts +0 -25
- package/dist/lib/generate-configs.js +0 -42
- package/dist/lib/grader-api.d.ts +0 -21
- package/dist/lib/grader-api.js +0 -34
- package/dist/lib/grader-compare.d.ts +0 -19
- package/dist/lib/grader-compare.js +0 -91
- package/dist/lib/grader-consistency.d.ts +0 -27
- package/dist/lib/grader-consistency.js +0 -79
- package/dist/lib/grader-sensitivity.d.ts +0 -19
- package/dist/lib/grader-sensitivity.js +0 -75
- package/dist/lib/grader-validate.d.ts +0 -19
- package/dist/lib/grader-validate.js +0 -78
- package/dist/lib/measure-retrieval.d.ts +0 -14
- package/dist/lib/measure-retrieval.js +0 -71
- package/dist/lib/pr-comment.d.ts +0 -16
- package/dist/lib/pr-comment.js +0 -28
- package/dist/lib/readiness-report.d.ts +0 -13
- package/dist/lib/readiness-report.js +0 -108
- package/dist/lib/webhook-server.d.ts +0 -11
- package/dist/lib/webhook-server.js +0 -24
- package/dist/lib/weekly-digest.d.ts +0 -24
- package/dist/lib/weekly-digest.js +0 -148
- package/dist/orchestration/env-bridge.d.ts +0 -21
- package/dist/orchestration/env-bridge.js +0 -66
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +0 -17
- package/dist/orchestration/steps/fetch-docs-shell.js +0 -30
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +0 -9
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +0 -339
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +0 -70
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +0 -50
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +0 -334
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -69
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -307
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +0 -65
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +0 -368
- package/dist/pipeline/compiler/task-bridge.d.ts +0 -41
- package/dist/pipeline/compiler/task-bridge.js +0 -92
- package/dist/pipeline/expand-tasks.d.ts +0 -232
- package/dist/pipeline/expand-tasks.js +0 -467
- package/dist/pipeline/generate-configs.d.ts +0 -92
- package/dist/pipeline/generate-configs.js +0 -445
- package/dist/pipeline/steps/calculate-scores-step.d.ts +0 -11
- package/dist/pipeline/steps/calculate-scores-step.js +0 -89
- package/dist/pipeline/steps/compare-step.d.ts +0 -18
- package/dist/pipeline/steps/compare-step.js +0 -90
- package/dist/pipeline/steps/eval-step.d.ts +0 -53
- package/dist/pipeline/steps/eval-step.js +0 -347
- package/dist/pipeline/steps/fetch-docs-step.d.ts +0 -11
- package/dist/pipeline/steps/fetch-docs-step.js +0 -84
- package/dist/pipeline/steps/generate-configs-step.d.ts +0 -11
- package/dist/pipeline/steps/generate-configs-step.js +0 -98
- package/dist/pipeline/steps/grader-consistency-step.d.ts +0 -21
- package/dist/pipeline/steps/grader-consistency-step.js +0 -74
- package/dist/pipeline/steps/publish-report-step.d.ts +0 -57
- package/dist/pipeline/steps/publish-report-step.js +0 -243
- package/dist/pipeline/steps/report-step.d.ts +0 -13
- package/dist/pipeline/steps/report-step.js +0 -56
- package/dist/pipeline/steps/update-scores-step.d.ts +0 -11
- package/dist/pipeline/steps/update-scores-step.js +0 -42
- package/dist/scripts/agent-behavior-report.d.ts +0 -19
- package/dist/scripts/agent-behavior-report.js +0 -315
- package/dist/scripts/baseline.d.ts +0 -43
- package/dist/scripts/baseline.js +0 -267
- package/dist/scripts/calculate-scores.d.ts +0 -166
- package/dist/scripts/calculate-scores.js +0 -1296
- package/dist/scripts/compare.d.ts +0 -22
- package/dist/scripts/compare.js +0 -334
- package/dist/scripts/coverage-audit.d.ts +0 -44
- package/dist/scripts/coverage-audit.js +0 -209
- package/dist/scripts/debug-eval.d.ts +0 -19
- package/dist/scripts/debug-eval.js +0 -73
- package/dist/scripts/discovery-report.d.ts +0 -58
- package/dist/scripts/discovery-report.js +0 -250
- package/dist/scripts/fetch-docs.d.ts +0 -35
- package/dist/scripts/fetch-docs.js +0 -472
- package/dist/scripts/generate-configs.d.ts +0 -66
- package/dist/scripts/generate-configs.js +0 -459
- package/dist/scripts/grader-api.d.ts +0 -27
- package/dist/scripts/grader-api.js +0 -206
- package/dist/scripts/grader-compare.d.ts +0 -22
- package/dist/scripts/grader-compare.js +0 -368
- package/dist/scripts/grader-consistency.d.ts +0 -20
- package/dist/scripts/grader-consistency.js +0 -313
- package/dist/scripts/grader-sensitivity.d.ts +0 -22
- package/dist/scripts/grader-sensitivity.js +0 -354
- package/dist/scripts/grader-validate.d.ts +0 -19
- package/dist/scripts/grader-validate.js +0 -267
- package/dist/scripts/measure-retrieval.d.ts +0 -10
- package/dist/scripts/measure-retrieval.js +0 -145
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +0 -24
- package/dist/scripts/migrate-tasks-to-content-lake.js +0 -328
- package/dist/scripts/pipeline.d.ts +0 -76
- package/dist/scripts/pipeline.js +0 -1031
- package/dist/scripts/pr-comment.d.ts +0 -10
- package/dist/scripts/pr-comment.js +0 -510
- package/dist/scripts/readiness-report.d.ts +0 -88
- package/dist/scripts/readiness-report.js +0 -342
- package/dist/scripts/update-quality-scores.d.ts +0 -15
- package/dist/scripts/update-quality-scores.js +0 -184
- package/dist/scripts/validate-task-sources.d.ts +0 -21
- package/dist/scripts/validate-task-sources.js +0 -210
- package/dist/scripts/validate.d.ts +0 -13
- package/dist/scripts/validate.js +0 -79
- package/dist/scripts/webhook-server.d.ts +0 -26
- package/dist/scripts/webhook-server.js +0 -147
- package/dist/scripts/weekly-digest.d.ts +0 -24
- package/dist/scripts/weekly-digest.js +0 -144
- package/dist/sinks/format-slack.d.ts +0 -64
- package/dist/sinks/format-slack.js +0 -306
- package/dist/sinks/slack-sink.d.ts +0 -27
- package/dist/sinks/slack-sink.js +0 -78
- package/dist/sinks/webhook-sink.d.ts +0 -19
- package/dist/sinks/webhook-sink.js +0 -50
- package/tasks/.expanded.agentic.yaml +0 -280
- package/tasks/.expanded.yaml +0 -565
|
@@ -1,328 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env tsx
|
|
2
|
-
/**
|
|
3
|
-
* Migration script: YAML tasks → Content Lake documents
|
|
4
|
-
*
|
|
5
|
-
* Reads all existing task definitions from tasks/*.yaml via YamlTaskSource,
|
|
6
|
-
* reference solution files from canonical/reference-solutions/, and creates
|
|
7
|
-
* corresponding ailf.featureArea, ailf.referenceSolution, and ailf.task
|
|
8
|
-
* documents in the Sanity Content Lake.
|
|
9
|
-
*
|
|
10
|
-
* Idempotent — uses createOrReplace with deterministic document IDs.
|
|
11
|
-
* Safe to run multiple times during development/testing.
|
|
12
|
-
*
|
|
13
|
-
* Usage:
|
|
14
|
-
* cd packages/eval
|
|
15
|
-
* npx tsx src/scripts/migrate-tasks-to-content-lake.ts
|
|
16
|
-
* npx tsx src/scripts/migrate-tasks-to-content-lake.ts --dry-run
|
|
17
|
-
*
|
|
18
|
-
* Prerequisites:
|
|
19
|
-
* - SANITY_API_TOKEN (or AILF_REPORT_SANITY_API_TOKEN) with write access
|
|
20
|
-
* - SANITY_PROJECT_ID and SANITY_DATASET configured
|
|
21
|
-
*
|
|
22
|
-
* @see docs/archive/exec-plans/tasks-as-content/phase-3-migration.md
|
|
23
|
-
*/
|
|
24
|
-
import { config as dotenvConfig } from "dotenv";
|
|
25
|
-
import { existsSync, readFileSync } from "fs";
|
|
26
|
-
import { dirname, extname, resolve } from "path";
|
|
27
|
-
import { fileURLToPath } from "url";
|
|
28
|
-
import { isSlugRef, } from "../_vendor/ailf-core/index.js";
|
|
29
|
-
import { getSanityClient } from "../sanity/client.js";
|
|
30
|
-
import { YamlTaskSource } from "../adapters/task-sources/yaml-task-source.js";
|
|
31
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
32
|
-
const ROOT = resolve(__dirname, "..", "..");
|
|
33
|
-
// Load .env from repository root (same as CLI entry point)
|
|
34
|
-
const envPath = resolve(ROOT, "..", "..", ".env");
|
|
35
|
-
if (existsSync(envPath)) {
|
|
36
|
-
dotenvConfig({ override: true, path: envPath });
|
|
37
|
-
}
|
|
38
|
-
// ---------------------------------------------------------------------------
|
|
39
|
-
// Configuration
|
|
40
|
-
// ---------------------------------------------------------------------------
|
|
41
|
-
const DRY_RUN = process.argv.includes("--dry-run");
|
|
42
|
-
/** Human-readable descriptions for feature areas (from YAML filenames). */
|
|
43
|
-
const AREA_DESCRIPTIONS = {
|
|
44
|
-
frameworks: "Framework Integrations",
|
|
45
|
-
functions: "Sanity Functions",
|
|
46
|
-
groq: "GROQ Query Language",
|
|
47
|
-
"nextjs-live": "Next.js Integration",
|
|
48
|
-
"studio-setup": "Studio Setup & Customization",
|
|
49
|
-
"visual-editing": "Visual Editing",
|
|
50
|
-
};
|
|
51
|
-
// ---------------------------------------------------------------------------
|
|
52
|
-
// Document ID conventions (deterministic for idempotent createOrReplace)
|
|
53
|
-
// ---------------------------------------------------------------------------
|
|
54
|
-
function featureAreaDocId(areaId) {
|
|
55
|
-
return `ailf.featureArea.${areaId}`;
|
|
56
|
-
}
|
|
57
|
-
function referenceSolutionDocId(taskId) {
|
|
58
|
-
return `ailf.referenceSolution.${taskId}`;
|
|
59
|
-
}
|
|
60
|
-
function taskDocId(taskId) {
|
|
61
|
-
return `ailf.task.${taskId}`;
|
|
62
|
-
}
|
|
63
|
-
// ---------------------------------------------------------------------------
|
|
64
|
-
// Portable Text helpers
|
|
65
|
-
// ---------------------------------------------------------------------------
|
|
66
|
-
/** Generate a simple random key for Portable Text array items. */
|
|
67
|
-
function ptKey() {
|
|
68
|
-
return Math.random().toString(36).slice(2, 14);
|
|
69
|
-
}
|
|
70
|
-
/**
|
|
71
|
-
* Convert a source code file to a Portable Text array with a single code block.
|
|
72
|
-
* This is the most faithful representation — one code block per file.
|
|
73
|
-
*/
|
|
74
|
-
function sourceToPortableText(code, language) {
|
|
75
|
-
return [
|
|
76
|
-
{
|
|
77
|
-
_key: ptKey(),
|
|
78
|
-
_type: "code",
|
|
79
|
-
code,
|
|
80
|
-
language,
|
|
81
|
-
},
|
|
82
|
-
];
|
|
83
|
-
}
|
|
84
|
-
/** Detect language from file extension. */
|
|
85
|
-
function languageFromExt(filePath) {
|
|
86
|
-
const ext = extname(filePath).toLowerCase();
|
|
87
|
-
if (ext === ".ts" || ext === ".tsx")
|
|
88
|
-
return "typescript";
|
|
89
|
-
if (ext === ".js" || ext === ".jsx")
|
|
90
|
-
return "javascript";
|
|
91
|
-
return "typescript"; // default
|
|
92
|
-
}
|
|
93
|
-
// ---------------------------------------------------------------------------
|
|
94
|
-
// Slug → article _id resolution
|
|
95
|
-
// ---------------------------------------------------------------------------
|
|
96
|
-
/**
|
|
97
|
-
* Resolve canonical doc slugs to Sanity article document IDs.
|
|
98
|
-
* Returns a map of slug → _id. Unresolved slugs are logged as warnings.
|
|
99
|
-
*/
|
|
100
|
-
async function resolveCanonicalDocIds(client, slugs) {
|
|
101
|
-
if (slugs.length === 0)
|
|
102
|
-
return new Map();
|
|
103
|
-
const query = /* groq */ `
|
|
104
|
-
*[_type == "article" && slug.current in $slugs] {
|
|
105
|
-
_id,
|
|
106
|
-
"slug": slug.current
|
|
107
|
-
}
|
|
108
|
-
`;
|
|
109
|
-
const results = await client.fetch(query, {
|
|
110
|
-
slugs,
|
|
111
|
-
});
|
|
112
|
-
const map = new Map();
|
|
113
|
-
for (const r of results) {
|
|
114
|
-
map.set(r.slug, r._id);
|
|
115
|
-
}
|
|
116
|
-
// Log unresolved slugs
|
|
117
|
-
for (const slug of slugs) {
|
|
118
|
-
if (!map.has(slug)) {
|
|
119
|
-
console.warn(` ⚠️ Canonical doc slug "${slug}" could not be resolved`);
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
return map;
|
|
123
|
-
}
|
|
124
|
-
// ---------------------------------------------------------------------------
|
|
125
|
-
// Document builders
|
|
126
|
-
// ---------------------------------------------------------------------------
|
|
127
|
-
function buildFeatureAreaDoc(areaId) {
|
|
128
|
-
return {
|
|
129
|
-
_id: featureAreaDocId(areaId),
|
|
130
|
-
_type: "ailf.featureArea",
|
|
131
|
-
areaId: { _type: "slug", current: areaId },
|
|
132
|
-
description: AREA_DESCRIPTIONS[areaId] ?? areaId,
|
|
133
|
-
};
|
|
134
|
-
}
|
|
135
|
-
function buildReferenceSolutionDoc(task, code, language) {
|
|
136
|
-
return {
|
|
137
|
-
_id: referenceSolutionDocId(task.id),
|
|
138
|
-
_type: "ailf.referenceSolution",
|
|
139
|
-
content: sourceToPortableText(code, language),
|
|
140
|
-
language,
|
|
141
|
-
title: `${task.title} — Reference Solution`,
|
|
142
|
-
};
|
|
143
|
-
}
|
|
144
|
-
function buildTaskDoc(task, slugToDocId, hasReferenceSolution) {
|
|
145
|
-
// Build canonical docs array with resolved references (slug refs only)
|
|
146
|
-
const canonicalDocs = (task.context?.docs ?? []).map((ref) => {
|
|
147
|
-
const resolvedId = isSlugRef(ref) ? slugToDocId.get(ref.slug) : undefined;
|
|
148
|
-
return {
|
|
149
|
-
_key: ptKey(),
|
|
150
|
-
...(resolvedId ? { doc: { _ref: resolvedId, _type: "reference" } } : {}),
|
|
151
|
-
reason: ref.reason ?? "",
|
|
152
|
-
};
|
|
153
|
-
});
|
|
154
|
-
// Build assertions array
|
|
155
|
-
const assertArray = (task.assertions ?? []).map((a) => {
|
|
156
|
-
const entry = {
|
|
157
|
-
_key: ptKey(),
|
|
158
|
-
type: a.type,
|
|
159
|
-
};
|
|
160
|
-
if (a.type === "llm-rubric" && "template" in a) {
|
|
161
|
-
entry.template = a.template;
|
|
162
|
-
if ("criteria" in a && Array.isArray(a.criteria)) {
|
|
163
|
-
entry.criteria = a.criteria;
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
if ("value" in a && a.value !== undefined) {
|
|
167
|
-
// Store value as a string (matching the Studio schema text field)
|
|
168
|
-
entry.value =
|
|
169
|
-
typeof a.value === "string" ? a.value : JSON.stringify(a.value);
|
|
170
|
-
}
|
|
171
|
-
if ("threshold" in a && a.threshold !== undefined) {
|
|
172
|
-
entry.threshold = a.threshold;
|
|
173
|
-
}
|
|
174
|
-
if (a.weight !== undefined) {
|
|
175
|
-
entry.weight = a.weight;
|
|
176
|
-
}
|
|
177
|
-
return entry;
|
|
178
|
-
});
|
|
179
|
-
const area = task.area ?? "";
|
|
180
|
-
const doc = {
|
|
181
|
-
_id: taskDocId(task.id),
|
|
182
|
-
_type: "ailf.task",
|
|
183
|
-
assert: assertArray,
|
|
184
|
-
canonicalDocs,
|
|
185
|
-
description: task.title,
|
|
186
|
-
docCoverage: task.docCoverage ?? false,
|
|
187
|
-
featureArea: {
|
|
188
|
-
_ref: featureAreaDocId(area),
|
|
189
|
-
_type: "reference",
|
|
190
|
-
},
|
|
191
|
-
id: { _type: "slug", current: task.id },
|
|
192
|
-
taskPrompt: task.prompt?.text ?? "",
|
|
193
|
-
};
|
|
194
|
-
// Optional reference solution
|
|
195
|
-
if (hasReferenceSolution) {
|
|
196
|
-
doc.referenceSolution = {
|
|
197
|
-
_ref: referenceSolutionDocId(task.id),
|
|
198
|
-
_type: "reference",
|
|
199
|
-
};
|
|
200
|
-
}
|
|
201
|
-
// Optional baseline config
|
|
202
|
-
if (task.baseline) {
|
|
203
|
-
doc.baseline = {
|
|
204
|
-
...(task.baseline.enabled !== undefined
|
|
205
|
-
? { enabled: task.baseline.enabled }
|
|
206
|
-
: {}),
|
|
207
|
-
...(task.baseline.rubric ? { rubric: task.baseline.rubric } : {}),
|
|
208
|
-
};
|
|
209
|
-
}
|
|
210
|
-
return doc;
|
|
211
|
-
}
|
|
212
|
-
// ---------------------------------------------------------------------------
|
|
213
|
-
// Main migration
|
|
214
|
-
// ---------------------------------------------------------------------------
|
|
215
|
-
async function migrate() {
|
|
216
|
-
console.log("\n📦 Tasks-as-Content Migration");
|
|
217
|
-
console.log("=".repeat(50));
|
|
218
|
-
if (DRY_RUN) {
|
|
219
|
-
console.log(" Mode: DRY RUN (no writes)");
|
|
220
|
-
}
|
|
221
|
-
// 1. Load all tasks from YAML
|
|
222
|
-
console.log("\n1️⃣ Loading tasks from YAML...");
|
|
223
|
-
const taskSource = new YamlTaskSource(ROOT);
|
|
224
|
-
const tasks = (await taskSource.loadTasks()).filter((t) => t.mode === "literacy");
|
|
225
|
-
console.log(` Loaded ${tasks.length} tasks`);
|
|
226
|
-
// 2. Extract unique feature areas
|
|
227
|
-
const areas = [...new Set(tasks.map((t) => t.area ?? ""))].sort();
|
|
228
|
-
console.log(` Found ${areas.length} feature areas: ${areas.join(", ")}`);
|
|
229
|
-
// 3. Collect all canonical doc slugs for batch resolution (slug refs only)
|
|
230
|
-
const allSlugs = [
|
|
231
|
-
...new Set(tasks.flatMap((t) => (t.context?.docs ?? []).filter(isSlugRef).map((d) => d.slug))),
|
|
232
|
-
];
|
|
233
|
-
console.log(` Found ${allSlugs.length} unique canonical doc slugs`);
|
|
234
|
-
// 4. Create Sanity client with write token
|
|
235
|
-
// Use the report token (which has write access) but target the content
|
|
236
|
-
// dataset (where articles live, so canonicalDocs references resolve).
|
|
237
|
-
const writeToken = process.env.AILF_REPORT_SANITY_API_TOKEN ?? process.env.SANITY_API_TOKEN;
|
|
238
|
-
if (!writeToken && !DRY_RUN) {
|
|
239
|
-
console.error(" ❌ No write token found. Set AILF_REPORT_SANITY_API_TOKEN or SANITY_API_TOKEN.");
|
|
240
|
-
process.exit(1);
|
|
241
|
-
}
|
|
242
|
-
const client = getSanityClient({
|
|
243
|
-
...(writeToken ? { token: writeToken } : {}),
|
|
244
|
-
});
|
|
245
|
-
console.log(` Sanity: project=${client.config().projectId} dataset=${client.config().dataset}`);
|
|
246
|
-
// 5. Resolve canonical doc slugs → article _ids
|
|
247
|
-
console.log("\n2️⃣ Resolving canonical doc slugs...");
|
|
248
|
-
const slugToDocId = await resolveCanonicalDocIds(client, allSlugs);
|
|
249
|
-
const resolved = slugToDocId.size;
|
|
250
|
-
const unresolved = allSlugs.length - resolved;
|
|
251
|
-
console.log(` Resolved: ${resolved}/${allSlugs.length}${unresolved > 0 ? ` (${unresolved} unresolved)` : ""}`);
|
|
252
|
-
// 6. Build all documents
|
|
253
|
-
console.log("\n3️⃣ Building documents...");
|
|
254
|
-
// Feature areas
|
|
255
|
-
const areaDocs = areas.map((a) => buildFeatureAreaDoc(a));
|
|
256
|
-
console.log(` ${areaDocs.length} ailf.featureArea documents`);
|
|
257
|
-
// Reference solutions
|
|
258
|
-
const refSolutionDocs = [];
|
|
259
|
-
const tasksWithSolutions = new Set();
|
|
260
|
-
for (const task of tasks) {
|
|
261
|
-
if (!task.referenceSolution)
|
|
262
|
-
continue;
|
|
263
|
-
const filePath = resolve(ROOT, "canonical", task.referenceSolution);
|
|
264
|
-
try {
|
|
265
|
-
const code = readFileSync(filePath, "utf-8");
|
|
266
|
-
const language = languageFromExt(filePath);
|
|
267
|
-
refSolutionDocs.push(buildReferenceSolutionDoc(task, code, language));
|
|
268
|
-
tasksWithSolutions.add(task.id);
|
|
269
|
-
}
|
|
270
|
-
catch (err) {
|
|
271
|
-
console.warn(` ⚠️ Could not read reference solution for ${task.id}: ${filePath}`);
|
|
272
|
-
if (err instanceof Error) {
|
|
273
|
-
console.warn(` ${err.message}`);
|
|
274
|
-
}
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
console.log(` ${refSolutionDocs.length} ailf.referenceSolution documents`);
|
|
278
|
-
// Tasks
|
|
279
|
-
const taskDocs = tasks.map((t) => buildTaskDoc(t, slugToDocId, tasksWithSolutions.has(t.id)));
|
|
280
|
-
console.log(` ${taskDocs.length} ailf.task documents`);
|
|
281
|
-
// 7. Write to Content Lake
|
|
282
|
-
const allDocs = [...areaDocs, ...refSolutionDocs, ...taskDocs];
|
|
283
|
-
console.log(`\n4️⃣ Writing ${allDocs.length} documents...`);
|
|
284
|
-
if (DRY_RUN) {
|
|
285
|
-
console.log(" DRY RUN — skipping writes");
|
|
286
|
-
console.log("\n Documents that would be created:");
|
|
287
|
-
for (const doc of allDocs) {
|
|
288
|
-
console.log(` ${doc._type} → ${doc._id}`);
|
|
289
|
-
}
|
|
290
|
-
}
|
|
291
|
-
else {
|
|
292
|
-
// Use a transaction for atomicity
|
|
293
|
-
let tx = client.transaction();
|
|
294
|
-
for (const doc of allDocs) {
|
|
295
|
-
tx = tx.createOrReplace(doc);
|
|
296
|
-
}
|
|
297
|
-
try {
|
|
298
|
-
const result = await tx.commit();
|
|
299
|
-
console.log(` ✅ Successfully wrote ${result.documentIds.length} documents`);
|
|
300
|
-
}
|
|
301
|
-
catch (err) {
|
|
302
|
-
console.error(" ❌ Transaction failed:");
|
|
303
|
-
if (err instanceof Error) {
|
|
304
|
-
console.error(` ${err.message}`);
|
|
305
|
-
}
|
|
306
|
-
process.exit(1);
|
|
307
|
-
}
|
|
308
|
-
}
|
|
309
|
-
// 8. Summary
|
|
310
|
-
console.log("\n" + "=".repeat(50));
|
|
311
|
-
console.log("📊 Migration Summary");
|
|
312
|
-
console.log(` Feature areas: ${areaDocs.length}`);
|
|
313
|
-
console.log(` Reference solutions: ${refSolutionDocs.length}`);
|
|
314
|
-
console.log(` Tasks: ${taskDocs.length}`);
|
|
315
|
-
console.log(` Total documents: ${allDocs.length}`);
|
|
316
|
-
console.log(` Canonical slugs: ${resolved} resolved, ${unresolved} unresolved`);
|
|
317
|
-
if (DRY_RUN) {
|
|
318
|
-
console.log("\n ℹ️ Run without --dry-run to write to the Content Lake");
|
|
319
|
-
}
|
|
320
|
-
console.log("");
|
|
321
|
-
}
|
|
322
|
-
// ---------------------------------------------------------------------------
|
|
323
|
-
// Run
|
|
324
|
-
// ---------------------------------------------------------------------------
|
|
325
|
-
migrate().catch((err) => {
|
|
326
|
-
console.error("Migration failed:", err);
|
|
327
|
-
process.exit(1);
|
|
328
|
-
});
|
|
@@ -1,76 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* pipeline.ts
|
|
3
|
-
*
|
|
4
|
-
* CLI orchestrator for the modular evaluation pipeline.
|
|
5
|
-
* Runs steps in sequence with validation between each.
|
|
6
|
-
*
|
|
7
|
-
* This is the single entry point for both local and CI evaluation.
|
|
8
|
-
* The CI workflow (eval.yml) calls this script, then layers on
|
|
9
|
-
* CI-specific post-steps (PR comment posting, artifact upload).
|
|
10
|
-
*
|
|
11
|
-
* Usage:
|
|
12
|
-
* pnpm pipeline # full baseline pipeline
|
|
13
|
-
* pnpm pipeline --dry-run # validate only, no execution
|
|
14
|
-
* pnpm pipeline --skip-fetch # reuse cached doc contexts
|
|
15
|
-
* pnpm pipeline --skip-eval # recalculate from existing results
|
|
16
|
-
* pnpm pipeline --mode agentic # run agentic pipeline
|
|
17
|
-
* pnpm pipeline --mode observed # run observed pipeline
|
|
18
|
-
* pnpm pipeline --source staging # use staging doc source
|
|
19
|
-
* pnpm pipeline --debug # run first 2 tests only (fast)
|
|
20
|
-
* pnpm pipeline --debug-n 5 # run first 5 tests
|
|
21
|
-
* pnpm pipeline --debug-pattern "Blog" # filter by description
|
|
22
|
-
* pnpm pipeline --debug-sample 3 # random sample of 3 tests
|
|
23
|
-
* pnpm pipeline --no-cache # bypass caching, force re-run
|
|
24
|
-
* pnpm pipeline --concurrency 64 # override max parallel API calls
|
|
25
|
-
* pnpm pipeline --area groq,frameworks # only evaluate these areas
|
|
26
|
-
* pnpm pipeline --task groq-blog-queries # only evaluate this task
|
|
27
|
-
* pnpm pipeline --changed-docs groq-introduction,how-queries-work
|
|
28
|
-
* # auto-scope to affected tasks
|
|
29
|
-
* pnpm pipeline --url https://... # override docs base URL
|
|
30
|
-
* pnpm pipeline --sanity-dataset staging # override Sanity dataset
|
|
31
|
-
* pnpm pipeline --sanity-project abc123 # override Sanity project ID
|
|
32
|
-
* pnpm pipeline --sanity-perspective agent-c7OKTk
|
|
33
|
-
* # evaluate a Sanity release
|
|
34
|
-
* pnpm pipeline --sanity-document <uuid>
|
|
35
|
-
* # evaluate specific document(s)
|
|
36
|
-
* pnpm pipeline --sanity-document <uuid> --sanity-documents <uuid>
|
|
37
|
-
* # singular and plural aliases work
|
|
38
|
-
* pnpm pipeline --header "X-Vercel-Protection-Bypass: <secret>"
|
|
39
|
-
* # custom HTTP header (repeatable)
|
|
40
|
-
* pnpm pipeline --allowed-origin my-branch.sanity.build
|
|
41
|
-
* # sandbox agent to this origin
|
|
42
|
-
* pnpm pipeline --before published # run before/after impact evaluation
|
|
43
|
-
* pnpm pipeline --before production # before = production source
|
|
44
|
-
* pnpm pipeline --before results/baselines/20260310.json # use existing scores
|
|
45
|
-
* pnpm pipeline --before latest-baseline # use most recent baseline
|
|
46
|
-
* pnpm pipeline --compare # compare scores against latest baseline
|
|
47
|
-
* pnpm pipeline --compare --compare-baseline <path> # compare against specific file
|
|
48
|
-
* pnpm pipeline --compare --threshold 5 # noise threshold for unchanged (default: 2)
|
|
49
|
-
* pnpm pipeline --output /tmp/report.md # write report to specific path
|
|
50
|
-
* pnpm pipeline --promptfoo-url <url> # include Promptfoo URL in report
|
|
51
|
-
* pnpm pipeline --gap-analysis # run failure mode + impact analysis
|
|
52
|
-
* pnpm pipeline --publish # write report to Sanity + fan out to sinks
|
|
53
|
-
* pnpm pipeline --publish --publish-tag "daily-2026-03-11" # tag the report
|
|
54
|
-
* pnpm pipeline --publish --report-dataset ailf-reports # report store dataset
|
|
55
|
-
* pnpm pipeline --publish --report-project abc123 # report store project
|
|
56
|
-
*
|
|
57
|
-
* Override precedence (highest wins):
|
|
58
|
-
* CLI flag (--url, --sanity-dataset, --sanity-project, --allowed-origin)
|
|
59
|
-
* → Environment variable (DOC_BASE_URL, SANITY_DATASET, SANITY_PROJECT_ID, DOC_ALLOWED_ORIGIN)
|
|
60
|
-
* → config/sources.yaml default value
|
|
61
|
-
*
|
|
62
|
-
* --header flags are additive and do not override env vars — they are
|
|
63
|
-
* always merged with any headers defined in DOC_HEADERS env var.
|
|
64
|
-
*
|
|
65
|
-
* Environment variable fallbacks (for CI):
|
|
66
|
-
* DEBUG_EVAL=1 → --debug
|
|
67
|
-
* DEBUG_EVAL_N=2 → --debug-n 2
|
|
68
|
-
* DEBUG_EVAL_PATTERN → --debug-pattern
|
|
69
|
-
* DEBUG_EVAL_SAMPLE → --debug-sample
|
|
70
|
-
* EVAL_FILTER_AREAS → --area
|
|
71
|
-
* EVAL_FILTER_TASKS → --task
|
|
72
|
-
* EVAL_CHANGED_DOCS → --changed-docs
|
|
73
|
-
* AILF_REPORT_DATASET → --report-dataset (report store, not eval)
|
|
74
|
-
* AILF_REPORT_PROJECT_ID → --report-project (report store, not eval)
|
|
75
|
-
*/
|
|
76
|
-
export {};
|