@sanity/ailf 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_vendor/ailf-core/examples/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/examples/index.js +1 -1
- package/dist/_vendor/ailf-core/ports/context.d.ts +6 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -53
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -2
- package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
- package/dist/_vendor/ailf-tasks/cli.js +61 -0
- package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
- package/dist/_vendor/ailf-tasks/index.js +16 -0
- package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
- package/dist/_vendor/ailf-tasks/parser.js +73 -0
- package/dist/_vendor/ailf-tasks/schemas.d.ts +186 -0
- package/dist/_vendor/ailf-tasks/schemas.js +176 -0
- package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
- package/dist/_vendor/ailf-tasks/validation.js +162 -0
- package/dist/adapters/api-client/api-client.d.ts +75 -0
- package/dist/adapters/api-client/api-client.js +201 -0
- package/dist/adapters/api-client/build-request.d.ts +75 -0
- package/dist/adapters/api-client/build-request.js +176 -0
- package/dist/adapters/api-client/errors.d.ts +43 -0
- package/dist/adapters/api-client/errors.js +68 -0
- package/dist/adapters/api-client/format-error.d.ts +22 -0
- package/dist/adapters/api-client/format-error.js +48 -0
- package/dist/adapters/api-client/index.d.ts +13 -0
- package/dist/adapters/api-client/index.js +12 -0
- package/dist/adapters/api-client/progress.d.ts +26 -0
- package/dist/adapters/api-client/progress.js +69 -0
- package/dist/adapters/api-client/remediation.d.ts +19 -0
- package/dist/adapters/api-client/remediation.js +76 -0
- package/dist/adapters/api-client/types.d.ts +98 -0
- package/dist/adapters/api-client/types.js +14 -0
- package/dist/adapters/config-sources/file-config-adapter.js +2 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +16 -181
- package/dist/adapters/task-sources/repo-schemas.js +27 -184
- package/dist/adapters/task-sources/repo-validation.d.ts +5 -46
- package/dist/adapters/task-sources/repo-validation.js +5 -161
- package/dist/commands/calculate-scores.js +2 -0
- package/dist/commands/explain-handler.js +6 -0
- package/dist/commands/fetch-docs.js +2 -0
- package/dist/commands/generate-configs.js +2 -0
- package/dist/commands/init.js +9 -9
- package/dist/commands/pipeline-action.d.ts +3 -0
- package/dist/commands/pipeline-action.js +13 -0
- package/dist/commands/pipeline.d.ts +2 -0
- package/dist/commands/pipeline.js +2 -0
- package/dist/commands/pr-comment.js +2 -0
- package/dist/commands/publish.js +2 -0
- package/dist/commands/remote-pipeline.d.ts +27 -0
- package/dist/commands/remote-pipeline.js +133 -0
- package/dist/commands/remote-results.d.ts +33 -0
- package/dist/commands/remote-results.js +97 -0
- package/dist/orchestration/build-app-context.js +3 -0
- package/dist/pipeline/map-request-to-config.js +2 -0
- package/package.json +2 -1
|
@@ -1,164 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* repo-validation.ts — Re-exports semantic validation from @sanity/ailf-tasks.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* -
|
|
6
|
-
*
|
|
7
|
-
* - Feature area strings are well-formed
|
|
8
|
-
* - Canonical doc slugs look reasonable (slugs, not URLs)
|
|
9
|
-
*
|
|
10
|
-
* These produce warnings, not errors — the pipeline can still run
|
|
11
|
-
* with imperfect tasks. Only structural failures (caught by Zod) block.
|
|
12
|
-
*
|
|
13
|
-
* @see packages/eval/src/adapters/task-sources/repo-schemas.ts
|
|
14
|
-
*/
|
|
15
|
-
import { CURATED_ASSERTION_TYPES, RUBRIC_TEMPLATE_NAMES, } from "./repo-schemas.js";
|
|
16
|
-
// ---------------------------------------------------------------------------
|
|
17
|
-
// Public API
|
|
18
|
-
// ---------------------------------------------------------------------------
|
|
19
|
-
/**
|
|
20
|
-
* Run semantic validation on an array of parsed repo tasks.
|
|
21
|
-
*
|
|
22
|
-
* Returns warnings for issues that don't block execution (unknown feature
|
|
23
|
-
* areas, unresolved slugs) and errors for issues that would cause pipeline
|
|
24
|
-
* failures (completely missing required fields — though Zod catches most).
|
|
25
|
-
*/
|
|
26
|
-
export function validateRepoTasks(tasks) {
|
|
27
|
-
const errors = [];
|
|
28
|
-
const warnings = [];
|
|
29
|
-
// Check for duplicate IDs
|
|
30
|
-
const seenIds = new Set();
|
|
31
|
-
for (const task of tasks) {
|
|
32
|
-
if (seenIds.has(task.id)) {
|
|
33
|
-
errors.push({
|
|
34
|
-
taskId: task.id,
|
|
35
|
-
field: "id",
|
|
36
|
-
message: `Duplicate task ID "${task.id}"`,
|
|
37
|
-
});
|
|
38
|
-
}
|
|
39
|
-
seenIds.add(task.id);
|
|
40
|
-
}
|
|
41
|
-
for (const task of tasks) {
|
|
42
|
-
// Check assertion types
|
|
43
|
-
for (let i = 0; i < task.assert.length; i++) {
|
|
44
|
-
const assertion = task.assert[i];
|
|
45
|
-
if (!CURATED_ASSERTION_TYPES.includes(assertion.type)) {
|
|
46
|
-
warnings.push({
|
|
47
|
-
taskId: task.id,
|
|
48
|
-
field: `assert[${i}].type`,
|
|
49
|
-
message: `Unknown assertion type "${assertion.type}". ` +
|
|
50
|
-
`Valid types: ${CURATED_ASSERTION_TYPES.join(", ")}`,
|
|
51
|
-
});
|
|
52
|
-
}
|
|
53
|
-
// Check rubric template for llm-rubric assertions
|
|
54
|
-
if (assertion.type === "llm-rubric" && "template" in assertion) {
|
|
55
|
-
const template = assertion.template;
|
|
56
|
-
if (!RUBRIC_TEMPLATE_NAMES.includes(template)) {
|
|
57
|
-
warnings.push({
|
|
58
|
-
taskId: task.id,
|
|
59
|
-
field: `assert[${i}].template`,
|
|
60
|
-
message: `Unknown rubric template "${template}". ` +
|
|
61
|
-
`Valid templates: ${RUBRIC_TEMPLATE_NAMES.join(", ")}`,
|
|
62
|
-
});
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
// Check canonical doc refs look reasonable
|
|
67
|
-
for (let i = 0; i < (task.canonicalDocs?.length ?? 0); i++) {
|
|
68
|
-
const doc = task.canonicalDocs[i];
|
|
69
|
-
// Slug refs: warn if they look like URLs or paths
|
|
70
|
-
if ("slug" in doc && !("id" in doc) && typeof doc.slug === "string") {
|
|
71
|
-
if (doc.slug.includes("/") || doc.slug.includes("http")) {
|
|
72
|
-
warnings.push({
|
|
73
|
-
taskId: task.id,
|
|
74
|
-
field: `canonicalDocs[${i}].slug`,
|
|
75
|
-
message: `Slug "${doc.slug}" looks like a URL or path — use 'path' type for paths or 'slug' for document slugs (e.g., "groq-introduction")`,
|
|
76
|
-
});
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
// Check task has at least one llm-rubric assertion (recommended but not required)
|
|
81
|
-
const hasLlmRubric = task.assert.some((a) => a.type === "llm-rubric");
|
|
82
|
-
if (!hasLlmRubric) {
|
|
83
|
-
warnings.push({
|
|
84
|
-
taskId: task.id,
|
|
85
|
-
field: "assert",
|
|
86
|
-
message: "No llm-rubric assertion found. Tasks should have at least one scored rubric for meaningful evaluation.",
|
|
87
|
-
});
|
|
88
|
-
}
|
|
89
|
-
// Check taskPrompt exists in vars (vars.task)
|
|
90
|
-
if (!task.vars?.task) {
|
|
91
|
-
warnings.push({
|
|
92
|
-
taskId: task.id,
|
|
93
|
-
field: "vars.task",
|
|
94
|
-
message: "No task prompt found in vars.task. The LLM will receive an empty implementation request.",
|
|
95
|
-
});
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
return {
|
|
99
|
-
valid: errors.length === 0,
|
|
100
|
-
errors,
|
|
101
|
-
warnings,
|
|
102
|
-
};
|
|
103
|
-
}
|
|
104
|
-
/**
|
|
105
|
-
* Format validation results for console output.
|
|
106
|
-
*/
|
|
107
|
-
export function formatValidationResult(result) {
|
|
108
|
-
const lines = [];
|
|
109
|
-
if (result.errors.length > 0) {
|
|
110
|
-
lines.push("❌ Errors:");
|
|
111
|
-
for (const e of result.errors) {
|
|
112
|
-
lines.push(` [${e.taskId}] ${e.field}: ${e.message}`);
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
if (result.warnings.length > 0) {
|
|
116
|
-
lines.push("⚠️ Warnings:");
|
|
117
|
-
for (const w of result.warnings) {
|
|
118
|
-
lines.push(` [${w.taskId}] ${w.field}: ${w.message}`);
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
if (result.valid && result.warnings.length === 0) {
|
|
122
|
-
lines.push("✅ All repo tasks pass validation");
|
|
123
|
-
}
|
|
124
|
-
return lines.join("\n");
|
|
125
|
-
}
|
|
126
|
-
// ---------------------------------------------------------------------------
|
|
127
|
-
// Snake_case detection (pre-parse helper)
|
|
128
|
-
// ---------------------------------------------------------------------------
|
|
129
|
-
/** Known snake_case → camelCase field mappings for common errors */
|
|
130
|
-
const SNAKE_TO_CAMEL = {
|
|
131
|
-
feature_area: "featureArea",
|
|
132
|
-
canonical_docs: "canonicalDocs",
|
|
133
|
-
doc_coverage: "docCoverage",
|
|
134
|
-
reference_solution: "referenceSolution",
|
|
135
|
-
};
|
|
136
|
-
/**
|
|
137
|
-
* Detect snake_case field names in raw task YAML data.
|
|
138
|
-
*
|
|
139
|
-
* This runs BEFORE Zod parsing to provide a user-friendly error message
|
|
140
|
-
* when authors use framework-internal snake_case names instead of the
|
|
141
|
-
* camelCase names expected in repo task files.
|
|
142
|
-
*
|
|
143
|
-
* @param raw - Raw parsed YAML (before Zod validation)
|
|
144
|
-
* @param filename - Source filename for error messages
|
|
145
|
-
* @returns Array of warning messages (empty if no issues)
|
|
4
|
+
* The validation logic is the single source of truth in @sanity/ailf-tasks.
|
|
5
|
+
* This file re-exports so existing eval-package importers don't need
|
|
6
|
+
* to change their import paths.
|
|
146
7
|
*/
|
|
147
|
-
export
|
|
148
|
-
const warnings = [];
|
|
149
|
-
if (!Array.isArray(raw))
|
|
150
|
-
return warnings;
|
|
151
|
-
for (let i = 0; i < raw.length; i++) {
|
|
152
|
-
const entry = raw[i];
|
|
153
|
-
if (typeof entry !== "object" || entry === null)
|
|
154
|
-
continue;
|
|
155
|
-
const obj = entry;
|
|
156
|
-
const taskId = typeof obj.id === "string" ? obj.id : `task[${i}]`;
|
|
157
|
-
for (const [snake, camel] of Object.entries(SNAKE_TO_CAMEL)) {
|
|
158
|
-
if (snake in obj) {
|
|
159
|
-
warnings.push(`[${filename}] ${taskId}: Found "${snake}" — repo tasks use camelCase. Did you mean "${camel}"?`);
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
return warnings;
|
|
164
|
-
}
|
|
8
|
+
export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, } from "../../_vendor/ailf-tasks/index.js";
|
|
@@ -691,6 +691,12 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
|
|
|
691
691
|
threshold: raw.threshold,
|
|
692
692
|
url: raw.url ?? [],
|
|
693
693
|
urls: raw.urls ?? [],
|
|
694
|
+
remote: raw.remote ?? false,
|
|
695
|
+
apiUrl: raw.apiUrl,
|
|
696
|
+
repoTasksPath: raw.repoTasksPath,
|
|
697
|
+
taskSource: raw.taskSource,
|
|
698
|
+
remoteCache: raw.remoteCache,
|
|
699
|
+
config: raw.config,
|
|
694
700
|
};
|
|
695
701
|
const resolved = computeResolvedOptions(withDefaults);
|
|
696
702
|
const planOpts = {
|
package/dist/commands/init.js
CHANGED
|
@@ -153,15 +153,15 @@ async function runInit(opts) {
|
|
|
153
153
|
console.log();
|
|
154
154
|
console.log(" Next steps:");
|
|
155
155
|
console.log();
|
|
156
|
-
console.log(` 1.
|
|
157
|
-
console.log("
|
|
158
|
-
console.log("
|
|
159
|
-
console.log("
|
|
160
|
-
console.log("
|
|
156
|
+
console.log(` 1. Edit the example tasks in ${rel(targetDir, tasksDir)}/ — update`);
|
|
157
|
+
console.log(" slugs and prompts for your documentation");
|
|
158
|
+
console.log(" 2. Validate locally: npx @sanity/ailf@latest validate-tasks .ailf/tasks/");
|
|
159
|
+
console.log(" 3. Add AILF_API_KEY as a GitHub Actions secret");
|
|
160
|
+
console.log(" (Settings → Secrets and variables → Actions)");
|
|
161
|
+
console.log(" 4. Push — the workflow at .github/workflows/ailf-eval.yml runs");
|
|
162
|
+
console.log(" automatically on PRs");
|
|
161
163
|
console.log();
|
|
162
|
-
console.log(" 💡
|
|
163
|
-
console.log();
|
|
164
|
-
console.log(" brew install 1password-cli # if not already installed");
|
|
165
|
-
console.log(' op read "op://Shared/AI Literacy Framework - Shared API Tokens/AILF_API_KEY_DEV"');
|
|
164
|
+
console.log(" 💡 Test locally before pushing:");
|
|
165
|
+
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --debug");
|
|
166
166
|
console.log();
|
|
167
167
|
}
|
|
@@ -48,10 +48,13 @@ export interface ResolvedOptions {
|
|
|
48
48
|
skipFetch: boolean;
|
|
49
49
|
source?: string;
|
|
50
50
|
studioOriginOverride?: string;
|
|
51
|
+
remote: boolean;
|
|
51
52
|
repoTasksPath?: string;
|
|
52
53
|
taskOption?: string;
|
|
53
54
|
taskSourceType?: "content-lake" | "yaml";
|
|
54
55
|
urlArgs: string[];
|
|
56
|
+
apiUrl: string;
|
|
57
|
+
apiKey?: string;
|
|
55
58
|
}
|
|
56
59
|
/**
|
|
57
60
|
* Pure option resolution — computes ResolvedOptions from CLI flags without
|
|
@@ -182,8 +182,14 @@ export function computeResolvedOptions(opts) {
|
|
|
182
182
|
process.env.AILF_REPORT_PROJECT_ID ??
|
|
183
183
|
repoConfig?.reportStore?.projectId ??
|
|
184
184
|
undefined;
|
|
185
|
+
// Remote mode
|
|
186
|
+
const remote = opts.remote || process.env.AILF_REMOTE === "1";
|
|
187
|
+
const apiUrl = opts.apiUrl ?? process.env.AILF_API_URL ?? "https://ailf-api.sanity.build";
|
|
188
|
+
const apiKey = process.env.AILF_API_KEY ?? undefined;
|
|
185
189
|
return {
|
|
186
190
|
allowedOriginArgs,
|
|
191
|
+
apiKey,
|
|
192
|
+
apiUrl,
|
|
187
193
|
areaOption,
|
|
188
194
|
beforeOption,
|
|
189
195
|
changedDocsOption,
|
|
@@ -209,6 +215,7 @@ export function computeResolvedOptions(opts) {
|
|
|
209
215
|
publishEnabled,
|
|
210
216
|
publishTag: opts.publishTag,
|
|
211
217
|
readinessEnabled: opts.readiness,
|
|
218
|
+
remote,
|
|
212
219
|
reportDataset,
|
|
213
220
|
reportProjectId,
|
|
214
221
|
sanityDocumentArgs,
|
|
@@ -270,6 +277,12 @@ export async function executePipeline(cliOpts) {
|
|
|
270
277
|
process.exit(result.success ? 0 : 1);
|
|
271
278
|
}
|
|
272
279
|
const o = resolveOptions(cliOpts);
|
|
280
|
+
// Remote mode — submit to AILF API instead of running locally
|
|
281
|
+
if (o.remote) {
|
|
282
|
+
const { runRemotePipeline } = await import("./remote-pipeline.js");
|
|
283
|
+
await runRemotePipeline(o, ROOT);
|
|
284
|
+
return;
|
|
285
|
+
}
|
|
273
286
|
// Dry-run: validate only, don't execute steps
|
|
274
287
|
if (o.dryRun) {
|
|
275
288
|
const { validateConfiguration } = await import("../pipeline/validate.js");
|
|
@@ -52,11 +52,13 @@ export interface PipelineCliOptions {
|
|
|
52
52
|
skipEval: boolean;
|
|
53
53
|
skipFetch: boolean;
|
|
54
54
|
source?: string;
|
|
55
|
+
remote: boolean;
|
|
55
56
|
repoTasksPath?: string;
|
|
56
57
|
task?: string;
|
|
57
58
|
taskSource?: string;
|
|
58
59
|
threshold?: number;
|
|
59
60
|
url: string[];
|
|
60
61
|
urls: string[];
|
|
62
|
+
apiUrl?: string;
|
|
61
63
|
}
|
|
62
64
|
export declare function createPipelineCommand(): Command;
|
|
@@ -41,6 +41,8 @@ export function createPipelineCommand() {
|
|
|
41
41
|
.option("--promptfoo-url <url>", "Promptfoo share URL for report")
|
|
42
42
|
.option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), yaml (tasks/*.yaml files, legacy)", "content-lake")
|
|
43
43
|
.option("--repo-tasks-path <path>", "Path to repo-based task definitions (.ailf/tasks/ directory)")
|
|
44
|
+
.option("--remote", "Submit evaluation to the AILF API instead of running locally", false)
|
|
45
|
+
.option("--api-url <url>", "AILF API base URL (default: https://ailf-api.sanity.build)")
|
|
44
46
|
.action(async (opts) => {
|
|
45
47
|
const { executePipeline } = await import("./pipeline-action.js");
|
|
46
48
|
await executePipeline(opts);
|
package/dist/commands/publish.js
CHANGED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* remote-pipeline.ts — Remote execution flow for `ailf pipeline --remote`.
|
|
3
|
+
*
|
|
4
|
+
* Reads local `.ailf/tasks/` YAML, validates locally via Zod schemas,
|
|
5
|
+
* submits to the AILF API, polls for completion, and writes the same
|
|
6
|
+
* output artifacts as local mode (score-summary.json, report.md,
|
|
7
|
+
* job-metadata.json).
|
|
8
|
+
*
|
|
9
|
+
* This module is the CLI-side counterpart of the API's POST /v1/pipeline
|
|
10
|
+
* endpoint. The CLI and API are published from the same monorepo, so the
|
|
11
|
+
* request/response shapes are always in sync.
|
|
12
|
+
*
|
|
13
|
+
* @see packages/eval/src/adapters/api-client/ — HTTP client + request builder
|
|
14
|
+
* @see docs/design-docs/cli-as-api-client.md — design doc
|
|
15
|
+
*/
|
|
16
|
+
import type { ResolvedOptions } from "./pipeline-action.js";
|
|
17
|
+
/**
|
|
18
|
+
* Run the evaluation pipeline in remote mode.
|
|
19
|
+
*
|
|
20
|
+
* 1. Validate we have an API key
|
|
21
|
+
* 2. Find and validate local tasks (fail-fast with Zod errors)
|
|
22
|
+
* 3. Build the PipelineRequest payload
|
|
23
|
+
* 4. Submit to the AILF API
|
|
24
|
+
* 5. Poll for completion with progress display
|
|
25
|
+
* 6. Write output artifacts (score-summary.json, report.md, job-metadata.json)
|
|
26
|
+
*/
|
|
27
|
+
export declare function runRemotePipeline(opts: ResolvedOptions, rootDir: string): Promise<void>;
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* remote-pipeline.ts — Remote execution flow for `ailf pipeline --remote`.
|
|
3
|
+
*
|
|
4
|
+
* Reads local `.ailf/tasks/` YAML, validates locally via Zod schemas,
|
|
5
|
+
* submits to the AILF API, polls for completion, and writes the same
|
|
6
|
+
* output artifacts as local mode (score-summary.json, report.md,
|
|
7
|
+
* job-metadata.json).
|
|
8
|
+
*
|
|
9
|
+
* This module is the CLI-side counterpart of the API's POST /v1/pipeline
|
|
10
|
+
* endpoint. The CLI and API are published from the same monorepo, so the
|
|
11
|
+
* request/response shapes are always in sync.
|
|
12
|
+
*
|
|
13
|
+
* @see packages/eval/src/adapters/api-client/ — HTTP client + request builder
|
|
14
|
+
* @see docs/design-docs/cli-as-api-client.md — design doc
|
|
15
|
+
*/
|
|
16
|
+
import { ZodError } from "zod";
|
|
17
|
+
import { ApiClient, buildRemoteRequest, createProgressDisplay, formatJobError, resolveTasksDir, } from "../adapters/api-client/index.js";
|
|
18
|
+
import { writeRemoteResults } from "./remote-results.js";
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// Public API
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
/**
|
|
23
|
+
* Run the evaluation pipeline in remote mode.
|
|
24
|
+
*
|
|
25
|
+
* 1. Validate we have an API key
|
|
26
|
+
* 2. Find and validate local tasks (fail-fast with Zod errors)
|
|
27
|
+
* 3. Build the PipelineRequest payload
|
|
28
|
+
* 4. Submit to the AILF API
|
|
29
|
+
* 5. Poll for completion with progress display
|
|
30
|
+
* 6. Write output artifacts (score-summary.json, report.md, job-metadata.json)
|
|
31
|
+
*/
|
|
32
|
+
export async function runRemotePipeline(opts, rootDir) {
|
|
33
|
+
// 1. Validate API key
|
|
34
|
+
if (!opts.apiKey) {
|
|
35
|
+
console.error("❌ AILF_API_KEY is required for remote evaluation.");
|
|
36
|
+
console.error("");
|
|
37
|
+
console.error(" Set it in your environment:");
|
|
38
|
+
console.error(" export AILF_API_KEY=ailf_live_sk_...");
|
|
39
|
+
process.exit(2);
|
|
40
|
+
}
|
|
41
|
+
const client = new ApiClient({
|
|
42
|
+
apiKey: opts.apiKey,
|
|
43
|
+
baseUrl: opts.apiUrl,
|
|
44
|
+
});
|
|
45
|
+
// 2. Find tasks directory
|
|
46
|
+
const tasksDir = resolveTasksDir(rootDir, opts.repoTasksPath);
|
|
47
|
+
// 3. Build request from local state (validates locally first)
|
|
48
|
+
const configSlice = toConfigSlice(opts);
|
|
49
|
+
let request;
|
|
50
|
+
let taskCount;
|
|
51
|
+
try {
|
|
52
|
+
const result = await buildRemoteRequest({
|
|
53
|
+
tasksDir,
|
|
54
|
+
config: configSlice,
|
|
55
|
+
});
|
|
56
|
+
request = result.request;
|
|
57
|
+
taskCount = result.taskCount;
|
|
58
|
+
}
|
|
59
|
+
catch (err) {
|
|
60
|
+
if (err instanceof ZodError) {
|
|
61
|
+
console.error("❌ Task validation failed:\n");
|
|
62
|
+
for (const issue of err.issues) {
|
|
63
|
+
console.error(` ${issue.path.join(".")}: ${issue.message}`);
|
|
64
|
+
}
|
|
65
|
+
console.error("");
|
|
66
|
+
console.error("💡 Fix the issues above in your .ailf/tasks/ YAML files.");
|
|
67
|
+
process.exit(2);
|
|
68
|
+
}
|
|
69
|
+
throw err;
|
|
70
|
+
}
|
|
71
|
+
console.log(`📦 Found ${taskCount} task(s) in ${tasksDir}`);
|
|
72
|
+
// 4. Submit to API
|
|
73
|
+
const submission = await client.submitPipeline(request);
|
|
74
|
+
console.log(`🚀 Submitted job: ${submission.jobId}`);
|
|
75
|
+
if (submission.estimatedDurationMs) {
|
|
76
|
+
const mins = Math.ceil(submission.estimatedDurationMs / 60_000);
|
|
77
|
+
console.log(`⏱️ Estimated duration: ~${mins} minute(s)`);
|
|
78
|
+
}
|
|
79
|
+
// 5. Poll for completion with progress display
|
|
80
|
+
const job = await client.waitForCompletion(submission.jobId, {
|
|
81
|
+
onProgress: createProgressDisplay(),
|
|
82
|
+
});
|
|
83
|
+
// 6. Handle result
|
|
84
|
+
if (job.status !== "completed") {
|
|
85
|
+
console.error("");
|
|
86
|
+
console.error(formatJobError(job));
|
|
87
|
+
process.exit(1);
|
|
88
|
+
}
|
|
89
|
+
// 7. Fetch and write output artifacts
|
|
90
|
+
await writeRemoteResults(client, job, {
|
|
91
|
+
rootDir,
|
|
92
|
+
outputPath: opts.outputPath,
|
|
93
|
+
apiUrl: opts.apiUrl,
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
// ---------------------------------------------------------------------------
|
|
97
|
+
// Helpers
|
|
98
|
+
// ---------------------------------------------------------------------------
|
|
99
|
+
/**
|
|
100
|
+
* Extract the subset of ResolvedOptions needed for building a remote request.
|
|
101
|
+
* This mapping keeps remote-pipeline.ts decoupled from the full ResolvedConfig.
|
|
102
|
+
*/
|
|
103
|
+
function toConfigSlice(opts) {
|
|
104
|
+
return {
|
|
105
|
+
mode: opts.mode,
|
|
106
|
+
debug: opts.debug,
|
|
107
|
+
areas: opts.areaOption
|
|
108
|
+
?.split(",")
|
|
109
|
+
.map((s) => s.trim())
|
|
110
|
+
.filter(Boolean),
|
|
111
|
+
tasks: opts.taskOption
|
|
112
|
+
?.split(",")
|
|
113
|
+
.map((s) => s.trim())
|
|
114
|
+
.filter(Boolean),
|
|
115
|
+
changedDocs: opts.changedDocsOption
|
|
116
|
+
?.split(",")
|
|
117
|
+
.map((s) => s.trim())
|
|
118
|
+
.filter(Boolean),
|
|
119
|
+
source: opts.source,
|
|
120
|
+
compareEnabled: opts.compareEnabled,
|
|
121
|
+
compareThreshold: opts.compareThreshold,
|
|
122
|
+
publishEnabled: opts.publishEnabled,
|
|
123
|
+
publishTag: opts.publishTag,
|
|
124
|
+
concurrency: opts.concurrency,
|
|
125
|
+
datasetOverride: opts.datasetOverride,
|
|
126
|
+
projectIdOverride: opts.projectIdOverride,
|
|
127
|
+
perspectiveOverride: opts.perspectiveOverride,
|
|
128
|
+
graderReplications: opts.graderReplications,
|
|
129
|
+
gapAnalysisEnabled: opts.gapAnalysisEnabled,
|
|
130
|
+
readinessEnabled: opts.readinessEnabled,
|
|
131
|
+
discoveryReportEnabled: opts.discoveryReportEnabled,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* remote-results.ts — Write output artifacts from a remote evaluation.
|
|
3
|
+
*
|
|
4
|
+
* Produces the same file layout as local mode so downstream tools
|
|
5
|
+
* (workflow PR comments, score comparison, baseline save) work unchanged:
|
|
6
|
+
*
|
|
7
|
+
* results/latest/score-summary.json — scores by area + overall
|
|
8
|
+
* results/latest/report.md — rendered markdown report
|
|
9
|
+
* results/latest/job-metadata.json — job ID, timing, API URL
|
|
10
|
+
*
|
|
11
|
+
* @see packages/eval/src/commands/remote-pipeline.ts — caller
|
|
12
|
+
*/
|
|
13
|
+
import type { ApiClient } from "../adapters/api-client/api-client.js";
|
|
14
|
+
import type { JobResponse } from "../adapters/api-client/types.js";
|
|
15
|
+
/** Options for writing remote results. */
|
|
16
|
+
export interface WriteResultsOptions {
|
|
17
|
+
/** Eval package root directory (for results/latest/ path). */
|
|
18
|
+
rootDir: string;
|
|
19
|
+
/** Optional output path override (--output flag). */
|
|
20
|
+
outputPath?: string;
|
|
21
|
+
/** API base URL (for metadata). */
|
|
22
|
+
apiUrl: string;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Fetch report artifacts from the API and write them to disk.
|
|
26
|
+
*
|
|
27
|
+
* Writes:
|
|
28
|
+
* - `results/latest/score-summary.json` — score data from job response
|
|
29
|
+
* - `results/latest/report.md` — full markdown report (if reportId present)
|
|
30
|
+
* - `results/latest/job-metadata.json` — job tracking info
|
|
31
|
+
* - `--output` path — markdown report (if specified)
|
|
32
|
+
*/
|
|
33
|
+
export declare function writeRemoteResults(client: ApiClient, job: JobResponse, options: WriteResultsOptions): Promise<void>;
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* remote-results.ts — Write output artifacts from a remote evaluation.
|
|
3
|
+
*
|
|
4
|
+
* Produces the same file layout as local mode so downstream tools
|
|
5
|
+
* (workflow PR comments, score comparison, baseline save) work unchanged:
|
|
6
|
+
*
|
|
7
|
+
* results/latest/score-summary.json — scores by area + overall
|
|
8
|
+
* results/latest/report.md — rendered markdown report
|
|
9
|
+
* results/latest/job-metadata.json — job ID, timing, API URL
|
|
10
|
+
*
|
|
11
|
+
* @see packages/eval/src/commands/remote-pipeline.ts — caller
|
|
12
|
+
*/
|
|
13
|
+
import { mkdirSync, writeFileSync } from "fs";
|
|
14
|
+
import { resolve } from "path";
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
// Public API
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
/**
|
|
19
|
+
* Fetch report artifacts from the API and write them to disk.
|
|
20
|
+
*
|
|
21
|
+
* Writes:
|
|
22
|
+
* - `results/latest/score-summary.json` — score data from job response
|
|
23
|
+
* - `results/latest/report.md` — full markdown report (if reportId present)
|
|
24
|
+
* - `results/latest/job-metadata.json` — job tracking info
|
|
25
|
+
* - `--output` path — markdown report (if specified)
|
|
26
|
+
*/
|
|
27
|
+
export async function writeRemoteResults(client, job, options) {
|
|
28
|
+
const resultsDir = resolve(options.rootDir, "results", "latest");
|
|
29
|
+
mkdirSync(resultsDir, { recursive: true });
|
|
30
|
+
// 1. Write score summary
|
|
31
|
+
const scoreSummary = buildScoreSummary(job);
|
|
32
|
+
writeFileSync(resolve(resultsDir, "score-summary.json"), JSON.stringify(scoreSummary, null, 2));
|
|
33
|
+
// 2. Fetch and write markdown report
|
|
34
|
+
let reportWritten = false;
|
|
35
|
+
if (job.reportId) {
|
|
36
|
+
try {
|
|
37
|
+
const markdown = await client.getReportMarkdown(job.reportId);
|
|
38
|
+
writeFileSync(resolve(resultsDir, "report.md"), markdown);
|
|
39
|
+
reportWritten = true;
|
|
40
|
+
// Also write to --output path if specified
|
|
41
|
+
if (options.outputPath) {
|
|
42
|
+
writeFileSync(options.outputPath, markdown);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
catch (err) {
|
|
46
|
+
console.warn(` ⚠️ Could not fetch report: ${err instanceof Error ? err.message : String(err)}`);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
// 3. Write job metadata
|
|
50
|
+
writeFileSync(resolve(resultsDir, "job-metadata.json"), JSON.stringify({
|
|
51
|
+
jobId: job.jobId,
|
|
52
|
+
status: job.status,
|
|
53
|
+
startedAt: job.startedAt ?? null,
|
|
54
|
+
completedAt: job.completedAt ?? null,
|
|
55
|
+
reportId: job.reportId ?? null,
|
|
56
|
+
reportUrl: job.reportUrl ?? null,
|
|
57
|
+
execution: job.execution ?? null,
|
|
58
|
+
apiUrl: options.apiUrl,
|
|
59
|
+
}, null, 2));
|
|
60
|
+
// 4. Print summary
|
|
61
|
+
console.log("");
|
|
62
|
+
console.log(`✅ Evaluation completed`);
|
|
63
|
+
console.log(` 📊 Results: ${resolve(resultsDir, "score-summary.json")}`);
|
|
64
|
+
if (reportWritten) {
|
|
65
|
+
console.log(` 📝 Report: ${resolve(resultsDir, "report.md")}`);
|
|
66
|
+
}
|
|
67
|
+
if (options.outputPath && reportWritten) {
|
|
68
|
+
console.log(` 📄 Output: ${options.outputPath}`);
|
|
69
|
+
}
|
|
70
|
+
if (job.reportUrl) {
|
|
71
|
+
console.log(` 🔗 Studio: ${job.reportUrl}`);
|
|
72
|
+
}
|
|
73
|
+
console.log(` 🏷️ Job ID: ${job.jobId}`);
|
|
74
|
+
}
|
|
75
|
+
// ---------------------------------------------------------------------------
|
|
76
|
+
// Helpers
|
|
77
|
+
// ---------------------------------------------------------------------------
|
|
78
|
+
/**
|
|
79
|
+
* Build a score summary object from the job response.
|
|
80
|
+
*
|
|
81
|
+
* The job response may contain a full scoreSummary (if the API includes it)
|
|
82
|
+
* or just minimal data. We build a structure that's compatible with the
|
|
83
|
+
* local pipeline's score-summary.json format.
|
|
84
|
+
*/
|
|
85
|
+
function buildScoreSummary(job) {
|
|
86
|
+
// The job response from the API may include inline score data in the
|
|
87
|
+
// future. For now, we store what we have — the job metadata — so
|
|
88
|
+
// downstream tools can at least read the file and know a remote eval
|
|
89
|
+
// completed.
|
|
90
|
+
return {
|
|
91
|
+
_remote: true,
|
|
92
|
+
jobId: job.jobId,
|
|
93
|
+
status: job.status,
|
|
94
|
+
reportId: job.reportId ?? null,
|
|
95
|
+
completedAt: job.completedAt ?? null,
|
|
96
|
+
};
|
|
97
|
+
}
|
|
@@ -69,6 +69,9 @@ export function mapToResolvedConfig(opts, rootDir) {
|
|
|
69
69
|
repoTasksPath: opts.repoTasksPath,
|
|
70
70
|
reportStoreProjectId: opts.reportProjectId,
|
|
71
71
|
reportStoreDataset: opts.reportDataset,
|
|
72
|
+
remote: opts.remote ?? false,
|
|
73
|
+
apiUrl: opts.apiUrl ?? "https://ailf-api.sanity.build",
|
|
74
|
+
apiKey: opts.apiKey,
|
|
72
75
|
};
|
|
73
76
|
}
|
|
74
77
|
/**
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sanity/ailf",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.7",
|
|
4
4
|
"private": false,
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "restricted"
|
|
@@ -64,6 +64,7 @@
|
|
|
64
64
|
"devDependencies": {
|
|
65
65
|
"@sanity/ailf-core": "workspace:*",
|
|
66
66
|
"@sanity/ailf-shared": "workspace:*",
|
|
67
|
+
"@sanity/ailf-tasks": "workspace:*",
|
|
67
68
|
"@types/js-yaml": "^4.0.9",
|
|
68
69
|
"@types/node": "^22.13.1",
|
|
69
70
|
"tsx": "^4.19.2",
|