@sanity/ailf 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/dist/_vendor/ailf-core/examples/index.d.ts +1 -1
  2. package/dist/_vendor/ailf-core/examples/index.js +1 -1
  3. package/dist/_vendor/ailf-core/ports/context.d.ts +6 -0
  4. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -53
  5. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -2
  6. package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
  7. package/dist/_vendor/ailf-tasks/cli.js +61 -0
  8. package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
  9. package/dist/_vendor/ailf-tasks/index.js +16 -0
  10. package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
  11. package/dist/_vendor/ailf-tasks/parser.js +73 -0
  12. package/dist/_vendor/ailf-tasks/schemas.d.ts +186 -0
  13. package/dist/_vendor/ailf-tasks/schemas.js +176 -0
  14. package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
  15. package/dist/_vendor/ailf-tasks/validation.js +162 -0
  16. package/dist/adapters/api-client/api-client.d.ts +75 -0
  17. package/dist/adapters/api-client/api-client.js +201 -0
  18. package/dist/adapters/api-client/build-request.d.ts +75 -0
  19. package/dist/adapters/api-client/build-request.js +176 -0
  20. package/dist/adapters/api-client/errors.d.ts +43 -0
  21. package/dist/adapters/api-client/errors.js +68 -0
  22. package/dist/adapters/api-client/format-error.d.ts +22 -0
  23. package/dist/adapters/api-client/format-error.js +48 -0
  24. package/dist/adapters/api-client/index.d.ts +13 -0
  25. package/dist/adapters/api-client/index.js +12 -0
  26. package/dist/adapters/api-client/progress.d.ts +26 -0
  27. package/dist/adapters/api-client/progress.js +69 -0
  28. package/dist/adapters/api-client/remediation.d.ts +19 -0
  29. package/dist/adapters/api-client/remediation.js +76 -0
  30. package/dist/adapters/api-client/types.d.ts +98 -0
  31. package/dist/adapters/api-client/types.js +14 -0
  32. package/dist/adapters/config-sources/file-config-adapter.js +2 -0
  33. package/dist/adapters/task-sources/repo-schemas.d.ts +16 -181
  34. package/dist/adapters/task-sources/repo-schemas.js +27 -184
  35. package/dist/adapters/task-sources/repo-validation.d.ts +5 -46
  36. package/dist/adapters/task-sources/repo-validation.js +5 -161
  37. package/dist/commands/calculate-scores.js +2 -0
  38. package/dist/commands/explain-handler.js +6 -0
  39. package/dist/commands/fetch-docs.js +2 -0
  40. package/dist/commands/generate-configs.js +2 -0
  41. package/dist/commands/init.js +9 -9
  42. package/dist/commands/pipeline-action.d.ts +3 -0
  43. package/dist/commands/pipeline-action.js +13 -0
  44. package/dist/commands/pipeline.d.ts +2 -0
  45. package/dist/commands/pipeline.js +2 -0
  46. package/dist/commands/pr-comment.js +2 -0
  47. package/dist/commands/publish.js +2 -0
  48. package/dist/commands/remote-pipeline.d.ts +27 -0
  49. package/dist/commands/remote-pipeline.js +133 -0
  50. package/dist/commands/remote-results.d.ts +33 -0
  51. package/dist/commands/remote-results.js +97 -0
  52. package/dist/orchestration/build-app-context.js +3 -0
  53. package/dist/pipeline/map-request-to-config.js +2 -0
  54. package/package.json +2 -1
@@ -1,164 +1,8 @@
1
1
  /**
2
- * Semantic validation for repo-based tasks.
2
+ * repo-validation.ts — Re-exports semantic validation from @sanity/ailf-tasks.
3
3
  *
4
- * Checks that go beyond Zod schema parsing:
5
- * - Assertion types are in the curated set
6
- * - Rubric template names resolve to known templates
7
- * - Feature area strings are well-formed
8
- * - Canonical doc slugs look reasonable (slugs, not URLs)
9
- *
10
- * These produce warnings, not errors — the pipeline can still run
11
- * with imperfect tasks. Only structural failures (caught by Zod) block.
12
- *
13
- * @see packages/eval/src/adapters/task-sources/repo-schemas.ts
14
- */
15
- import { CURATED_ASSERTION_TYPES, RUBRIC_TEMPLATE_NAMES, } from "./repo-schemas.js";
16
- // ---------------------------------------------------------------------------
17
- // Public API
18
- // ---------------------------------------------------------------------------
19
- /**
20
- * Run semantic validation on an array of parsed repo tasks.
21
- *
22
- * Returns warnings for issues that don't block execution (unknown feature
23
- * areas, unresolved slugs) and errors for issues that would cause pipeline
24
- * failures (completely missing required fields — though Zod catches most).
25
- */
26
- export function validateRepoTasks(tasks) {
27
- const errors = [];
28
- const warnings = [];
29
- // Check for duplicate IDs
30
- const seenIds = new Set();
31
- for (const task of tasks) {
32
- if (seenIds.has(task.id)) {
33
- errors.push({
34
- taskId: task.id,
35
- field: "id",
36
- message: `Duplicate task ID "${task.id}"`,
37
- });
38
- }
39
- seenIds.add(task.id);
40
- }
41
- for (const task of tasks) {
42
- // Check assertion types
43
- for (let i = 0; i < task.assert.length; i++) {
44
- const assertion = task.assert[i];
45
- if (!CURATED_ASSERTION_TYPES.includes(assertion.type)) {
46
- warnings.push({
47
- taskId: task.id,
48
- field: `assert[${i}].type`,
49
- message: `Unknown assertion type "${assertion.type}". ` +
50
- `Valid types: ${CURATED_ASSERTION_TYPES.join(", ")}`,
51
- });
52
- }
53
- // Check rubric template for llm-rubric assertions
54
- if (assertion.type === "llm-rubric" && "template" in assertion) {
55
- const template = assertion.template;
56
- if (!RUBRIC_TEMPLATE_NAMES.includes(template)) {
57
- warnings.push({
58
- taskId: task.id,
59
- field: `assert[${i}].template`,
60
- message: `Unknown rubric template "${template}". ` +
61
- `Valid templates: ${RUBRIC_TEMPLATE_NAMES.join(", ")}`,
62
- });
63
- }
64
- }
65
- }
66
- // Check canonical doc refs look reasonable
67
- for (let i = 0; i < (task.canonicalDocs?.length ?? 0); i++) {
68
- const doc = task.canonicalDocs[i];
69
- // Slug refs: warn if they look like URLs or paths
70
- if ("slug" in doc && !("id" in doc) && typeof doc.slug === "string") {
71
- if (doc.slug.includes("/") || doc.slug.includes("http")) {
72
- warnings.push({
73
- taskId: task.id,
74
- field: `canonicalDocs[${i}].slug`,
75
- message: `Slug "${doc.slug}" looks like a URL or path — use 'path' type for paths or 'slug' for document slugs (e.g., "groq-introduction")`,
76
- });
77
- }
78
- }
79
- }
80
- // Check task has at least one llm-rubric assertion (recommended but not required)
81
- const hasLlmRubric = task.assert.some((a) => a.type === "llm-rubric");
82
- if (!hasLlmRubric) {
83
- warnings.push({
84
- taskId: task.id,
85
- field: "assert",
86
- message: "No llm-rubric assertion found. Tasks should have at least one scored rubric for meaningful evaluation.",
87
- });
88
- }
89
- // Check taskPrompt exists in vars (vars.task)
90
- if (!task.vars?.task) {
91
- warnings.push({
92
- taskId: task.id,
93
- field: "vars.task",
94
- message: "No task prompt found in vars.task. The LLM will receive an empty implementation request.",
95
- });
96
- }
97
- }
98
- return {
99
- valid: errors.length === 0,
100
- errors,
101
- warnings,
102
- };
103
- }
104
- /**
105
- * Format validation results for console output.
106
- */
107
- export function formatValidationResult(result) {
108
- const lines = [];
109
- if (result.errors.length > 0) {
110
- lines.push("❌ Errors:");
111
- for (const e of result.errors) {
112
- lines.push(` [${e.taskId}] ${e.field}: ${e.message}`);
113
- }
114
- }
115
- if (result.warnings.length > 0) {
116
- lines.push("⚠️ Warnings:");
117
- for (const w of result.warnings) {
118
- lines.push(` [${w.taskId}] ${w.field}: ${w.message}`);
119
- }
120
- }
121
- if (result.valid && result.warnings.length === 0) {
122
- lines.push("✅ All repo tasks pass validation");
123
- }
124
- return lines.join("\n");
125
- }
126
- // ---------------------------------------------------------------------------
127
- // Snake_case detection (pre-parse helper)
128
- // ---------------------------------------------------------------------------
129
- /** Known snake_case → camelCase field mappings for common errors */
130
- const SNAKE_TO_CAMEL = {
131
- feature_area: "featureArea",
132
- canonical_docs: "canonicalDocs",
133
- doc_coverage: "docCoverage",
134
- reference_solution: "referenceSolution",
135
- };
136
- /**
137
- * Detect snake_case field names in raw task YAML data.
138
- *
139
- * This runs BEFORE Zod parsing to provide a user-friendly error message
140
- * when authors use framework-internal snake_case names instead of the
141
- * camelCase names expected in repo task files.
142
- *
143
- * @param raw - Raw parsed YAML (before Zod validation)
144
- * @param filename - Source filename for error messages
145
- * @returns Array of warning messages (empty if no issues)
4
+ * The validation logic is the single source of truth in @sanity/ailf-tasks.
5
+ * This file re-exports so existing eval-package importers don't need
6
+ * to change their import paths.
146
7
  */
147
- export function detectSnakeCaseFields(raw, filename) {
148
- const warnings = [];
149
- if (!Array.isArray(raw))
150
- return warnings;
151
- for (let i = 0; i < raw.length; i++) {
152
- const entry = raw[i];
153
- if (typeof entry !== "object" || entry === null)
154
- continue;
155
- const obj = entry;
156
- const taskId = typeof obj.id === "string" ? obj.id : `task[${i}]`;
157
- for (const [snake, camel] of Object.entries(SNAKE_TO_CAMEL)) {
158
- if (snake in obj) {
159
- warnings.push(`[${filename}] ${taskId}: Found "${snake}" — repo tasks use camelCase. Did you mean "${camel}"?`);
160
- }
161
- }
162
- }
163
- return warnings;
164
- }
8
+ export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, } from "../../_vendor/ailf-tasks/index.js";
@@ -32,6 +32,8 @@ export function createCalculateScoresCommand() {
32
32
  noRemoteCache: true,
33
33
  searchMode: "open",
34
34
  source: opts.source,
35
+ remote: false,
36
+ apiUrl: "https://ailf-api.sanity.build",
35
37
  });
36
38
  calculateAndWriteScores({
37
39
  resultsPath,
@@ -691,6 +691,12 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
691
691
  threshold: raw.threshold,
692
692
  url: raw.url ?? [],
693
693
  urls: raw.urls ?? [],
694
+ remote: raw.remote ?? false,
695
+ apiUrl: raw.apiUrl,
696
+ repoTasksPath: raw.repoTasksPath,
697
+ taskSource: raw.taskSource,
698
+ remoteCache: raw.remoteCache,
699
+ config: raw.config,
694
700
  };
695
701
  const resolved = computeResolvedOptions(withDefaults);
696
702
  const planOpts = {
@@ -53,6 +53,8 @@ async function executeFetchDocs(opts) {
53
53
  noRemoteCache: true,
54
54
  searchMode: "open",
55
55
  source: opts.source,
56
+ remote: false,
57
+ apiUrl: "https://ailf-api.sanity.build",
56
58
  });
57
59
  // Resolve source
58
60
  const overrides = configToSourceOverrides(ctx.config);
@@ -31,6 +31,8 @@ export function createGenerateConfigsCommand() {
31
31
  noRemoteCache: true,
32
32
  searchMode: "open",
33
33
  source: opts.source,
34
+ remote: false,
35
+ apiUrl: "https://ailf-api.sanity.build",
34
36
  });
35
37
  generateConfigs({
36
38
  rootDir: ctx.config.rootDir,
@@ -153,15 +153,15 @@ async function runInit(opts) {
153
153
  console.log();
154
154
  console.log(" Next steps:");
155
155
  console.log();
156
- console.log(` 1. Customize the example tasks in ${rel(targetDir, tasksDir)}/`);
157
- console.log(" 2. Validate: npx @sanity/ailf validate-tasks .ailf/tasks/");
158
- console.log(" 3. Set AILF_API_KEY in your environment (e.g. in a local .env file)");
159
- console.log(" and add it as a GitHub Actions secret (Settings → Secrets)");
160
- console.log(" 4. Push the workflow at .github/workflows/ailf-eval.yml handles the rest");
156
+ console.log(` 1. Edit the example tasks in ${rel(targetDir, tasksDir)}/ — update`);
157
+ console.log(" slugs and prompts for your documentation");
158
+ console.log(" 2. Validate locally: npx @sanity/ailf@latest validate-tasks .ailf/tasks/");
159
+ console.log(" 3. Add AILF_API_KEY as a GitHub Actions secret");
160
+ console.log(" (Settings Secrets and variables Actions)");
161
+ console.log(" 4. Push — the workflow at .github/workflows/ailf-eval.yml runs");
162
+ console.log(" automatically on PRs");
161
163
  console.log();
162
- console.log(" 💡 Get an API key with the 1Password CLI:");
163
- console.log();
164
- console.log(" brew install 1password-cli # if not already installed");
165
- console.log(' op read "op://Shared/AI Literacy Framework - Shared API Tokens/AILF_API_KEY_DEV"');
164
+ console.log(" 💡 Test locally before pushing:");
165
+ console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --debug");
166
166
  console.log();
167
167
  }
@@ -48,10 +48,13 @@ export interface ResolvedOptions {
48
48
  skipFetch: boolean;
49
49
  source?: string;
50
50
  studioOriginOverride?: string;
51
+ remote: boolean;
51
52
  repoTasksPath?: string;
52
53
  taskOption?: string;
53
54
  taskSourceType?: "content-lake" | "yaml";
54
55
  urlArgs: string[];
56
+ apiUrl: string;
57
+ apiKey?: string;
55
58
  }
56
59
  /**
57
60
  * Pure option resolution — computes ResolvedOptions from CLI flags without
@@ -182,8 +182,14 @@ export function computeResolvedOptions(opts) {
182
182
  process.env.AILF_REPORT_PROJECT_ID ??
183
183
  repoConfig?.reportStore?.projectId ??
184
184
  undefined;
185
+ // Remote mode
186
+ const remote = opts.remote || process.env.AILF_REMOTE === "1";
187
+ const apiUrl = opts.apiUrl ?? process.env.AILF_API_URL ?? "https://ailf-api.sanity.build";
188
+ const apiKey = process.env.AILF_API_KEY ?? undefined;
185
189
  return {
186
190
  allowedOriginArgs,
191
+ apiKey,
192
+ apiUrl,
187
193
  areaOption,
188
194
  beforeOption,
189
195
  changedDocsOption,
@@ -209,6 +215,7 @@ export function computeResolvedOptions(opts) {
209
215
  publishEnabled,
210
216
  publishTag: opts.publishTag,
211
217
  readinessEnabled: opts.readiness,
218
+ remote,
212
219
  reportDataset,
213
220
  reportProjectId,
214
221
  sanityDocumentArgs,
@@ -270,6 +277,12 @@ export async function executePipeline(cliOpts) {
270
277
  process.exit(result.success ? 0 : 1);
271
278
  }
272
279
  const o = resolveOptions(cliOpts);
280
+ // Remote mode — submit to AILF API instead of running locally
281
+ if (o.remote) {
282
+ const { runRemotePipeline } = await import("./remote-pipeline.js");
283
+ await runRemotePipeline(o, ROOT);
284
+ return;
285
+ }
273
286
  // Dry-run: validate only, don't execute steps
274
287
  if (o.dryRun) {
275
288
  const { validateConfiguration } = await import("../pipeline/validate.js");
@@ -52,11 +52,13 @@ export interface PipelineCliOptions {
52
52
  skipEval: boolean;
53
53
  skipFetch: boolean;
54
54
  source?: string;
55
+ remote: boolean;
55
56
  repoTasksPath?: string;
56
57
  task?: string;
57
58
  taskSource?: string;
58
59
  threshold?: number;
59
60
  url: string[];
60
61
  urls: string[];
62
+ apiUrl?: string;
61
63
  }
62
64
  export declare function createPipelineCommand(): Command;
@@ -41,6 +41,8 @@ export function createPipelineCommand() {
41
41
  .option("--promptfoo-url <url>", "Promptfoo share URL for report")
42
42
  .option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), yaml (tasks/*.yaml files, legacy)", "content-lake")
43
43
  .option("--repo-tasks-path <path>", "Path to repo-based task definitions (.ailf/tasks/ directory)")
44
+ .option("--remote", "Submit evaluation to the AILF API instead of running locally", false)
45
+ .option("--api-url <url>", "AILF API base URL (default: https://ailf-api.sanity.build)")
44
46
  .action(async (opts) => {
45
47
  const { executePipeline } = await import("./pipeline-action.js");
46
48
  await executePipeline(opts);
@@ -31,6 +31,8 @@ export function createPrCommentCommand() {
31
31
  noCache: true,
32
32
  noRemoteCache: true,
33
33
  searchMode: "open",
34
+ remote: false,
35
+ apiUrl: "https://ailf-api.sanity.build",
34
36
  });
35
37
  generatePrComment({
36
38
  outputPath: opts.output,
@@ -92,6 +92,8 @@ async function runPublishCommand(summaryPath, opts) {
92
92
  searchMode: "open",
93
93
  skipEval: true,
94
94
  skipFetch: true,
95
+ remote: false,
96
+ apiUrl: "https://ailf-api.sanity.build",
95
97
  });
96
98
  const store = ctx.reportStore;
97
99
  const sinks = (ctx.sinks ?? []);
@@ -0,0 +1,27 @@
1
+ /**
2
+ * remote-pipeline.ts — Remote execution flow for `ailf pipeline --remote`.
3
+ *
4
+ * Reads local `.ailf/tasks/` YAML, validates locally via Zod schemas,
5
+ * submits to the AILF API, polls for completion, and writes the same
6
+ * output artifacts as local mode (score-summary.json, report.md,
7
+ * job-metadata.json).
8
+ *
9
+ * This module is the CLI-side counterpart of the API's POST /v1/pipeline
10
+ * endpoint. The CLI and API are published from the same monorepo, so the
11
+ * request/response shapes are always in sync.
12
+ *
13
+ * @see packages/eval/src/adapters/api-client/ — HTTP client + request builder
14
+ * @see docs/design-docs/cli-as-api-client.md — design doc
15
+ */
16
+ import type { ResolvedOptions } from "./pipeline-action.js";
17
+ /**
18
+ * Run the evaluation pipeline in remote mode.
19
+ *
20
+ * 1. Validate we have an API key
21
+ * 2. Find and validate local tasks (fail-fast with Zod errors)
22
+ * 3. Build the PipelineRequest payload
23
+ * 4. Submit to the AILF API
24
+ * 5. Poll for completion with progress display
25
+ * 6. Write output artifacts (score-summary.json, report.md, job-metadata.json)
26
+ */
27
+ export declare function runRemotePipeline(opts: ResolvedOptions, rootDir: string): Promise<void>;
@@ -0,0 +1,133 @@
1
+ /**
2
+ * remote-pipeline.ts — Remote execution flow for `ailf pipeline --remote`.
3
+ *
4
+ * Reads local `.ailf/tasks/` YAML, validates locally via Zod schemas,
5
+ * submits to the AILF API, polls for completion, and writes the same
6
+ * output artifacts as local mode (score-summary.json, report.md,
7
+ * job-metadata.json).
8
+ *
9
+ * This module is the CLI-side counterpart of the API's POST /v1/pipeline
10
+ * endpoint. The CLI and API are published from the same monorepo, so the
11
+ * request/response shapes are always in sync.
12
+ *
13
+ * @see packages/eval/src/adapters/api-client/ — HTTP client + request builder
14
+ * @see docs/design-docs/cli-as-api-client.md — design doc
15
+ */
16
+ import { ZodError } from "zod";
17
+ import { ApiClient, buildRemoteRequest, createProgressDisplay, formatJobError, resolveTasksDir, } from "../adapters/api-client/index.js";
18
+ import { writeRemoteResults } from "./remote-results.js";
19
+ // ---------------------------------------------------------------------------
20
+ // Public API
21
+ // ---------------------------------------------------------------------------
22
+ /**
23
+ * Run the evaluation pipeline in remote mode.
24
+ *
25
+ * 1. Validate we have an API key
26
+ * 2. Find and validate local tasks (fail-fast with Zod errors)
27
+ * 3. Build the PipelineRequest payload
28
+ * 4. Submit to the AILF API
29
+ * 5. Poll for completion with progress display
30
+ * 6. Write output artifacts (score-summary.json, report.md, job-metadata.json)
31
+ */
32
+ export async function runRemotePipeline(opts, rootDir) {
33
+ // 1. Validate API key
34
+ if (!opts.apiKey) {
35
+ console.error("❌ AILF_API_KEY is required for remote evaluation.");
36
+ console.error("");
37
+ console.error(" Set it in your environment:");
38
+ console.error(" export AILF_API_KEY=ailf_live_sk_...");
39
+ process.exit(2);
40
+ }
41
+ const client = new ApiClient({
42
+ apiKey: opts.apiKey,
43
+ baseUrl: opts.apiUrl,
44
+ });
45
+ // 2. Find tasks directory
46
+ const tasksDir = resolveTasksDir(rootDir, opts.repoTasksPath);
47
+ // 3. Build request from local state (validates locally first)
48
+ const configSlice = toConfigSlice(opts);
49
+ let request;
50
+ let taskCount;
51
+ try {
52
+ const result = await buildRemoteRequest({
53
+ tasksDir,
54
+ config: configSlice,
55
+ });
56
+ request = result.request;
57
+ taskCount = result.taskCount;
58
+ }
59
+ catch (err) {
60
+ if (err instanceof ZodError) {
61
+ console.error("❌ Task validation failed:\n");
62
+ for (const issue of err.issues) {
63
+ console.error(` ${issue.path.join(".")}: ${issue.message}`);
64
+ }
65
+ console.error("");
66
+ console.error("💡 Fix the issues above in your .ailf/tasks/ YAML files.");
67
+ process.exit(2);
68
+ }
69
+ throw err;
70
+ }
71
+ console.log(`📦 Found ${taskCount} task(s) in ${tasksDir}`);
72
+ // 4. Submit to API
73
+ const submission = await client.submitPipeline(request);
74
+ console.log(`🚀 Submitted job: ${submission.jobId}`);
75
+ if (submission.estimatedDurationMs) {
76
+ const mins = Math.ceil(submission.estimatedDurationMs / 60_000);
77
+ console.log(`⏱️ Estimated duration: ~${mins} minute(s)`);
78
+ }
79
+ // 5. Poll for completion with progress display
80
+ const job = await client.waitForCompletion(submission.jobId, {
81
+ onProgress: createProgressDisplay(),
82
+ });
83
+ // 6. Handle result
84
+ if (job.status !== "completed") {
85
+ console.error("");
86
+ console.error(formatJobError(job));
87
+ process.exit(1);
88
+ }
89
+ // 7. Fetch and write output artifacts
90
+ await writeRemoteResults(client, job, {
91
+ rootDir,
92
+ outputPath: opts.outputPath,
93
+ apiUrl: opts.apiUrl,
94
+ });
95
+ }
96
+ // ---------------------------------------------------------------------------
97
+ // Helpers
98
+ // ---------------------------------------------------------------------------
99
+ /**
100
+ * Extract the subset of ResolvedOptions needed for building a remote request.
101
+ * This mapping keeps remote-pipeline.ts decoupled from the full ResolvedConfig.
102
+ */
103
+ function toConfigSlice(opts) {
104
+ return {
105
+ mode: opts.mode,
106
+ debug: opts.debug,
107
+ areas: opts.areaOption
108
+ ?.split(",")
109
+ .map((s) => s.trim())
110
+ .filter(Boolean),
111
+ tasks: opts.taskOption
112
+ ?.split(",")
113
+ .map((s) => s.trim())
114
+ .filter(Boolean),
115
+ changedDocs: opts.changedDocsOption
116
+ ?.split(",")
117
+ .map((s) => s.trim())
118
+ .filter(Boolean),
119
+ source: opts.source,
120
+ compareEnabled: opts.compareEnabled,
121
+ compareThreshold: opts.compareThreshold,
122
+ publishEnabled: opts.publishEnabled,
123
+ publishTag: opts.publishTag,
124
+ concurrency: opts.concurrency,
125
+ datasetOverride: opts.datasetOverride,
126
+ projectIdOverride: opts.projectIdOverride,
127
+ perspectiveOverride: opts.perspectiveOverride,
128
+ graderReplications: opts.graderReplications,
129
+ gapAnalysisEnabled: opts.gapAnalysisEnabled,
130
+ readinessEnabled: opts.readinessEnabled,
131
+ discoveryReportEnabled: opts.discoveryReportEnabled,
132
+ };
133
+ }
@@ -0,0 +1,33 @@
1
+ /**
2
+ * remote-results.ts — Write output artifacts from a remote evaluation.
3
+ *
4
+ * Produces the same file layout as local mode so downstream tools
5
+ * (workflow PR comments, score comparison, baseline save) work unchanged:
6
+ *
7
+ * results/latest/score-summary.json — scores by area + overall
8
+ * results/latest/report.md — rendered markdown report
9
+ * results/latest/job-metadata.json — job ID, timing, API URL
10
+ *
11
+ * @see packages/eval/src/commands/remote-pipeline.ts — caller
12
+ */
13
+ import type { ApiClient } from "../adapters/api-client/api-client.js";
14
+ import type { JobResponse } from "../adapters/api-client/types.js";
15
+ /** Options for writing remote results. */
16
+ export interface WriteResultsOptions {
17
+ /** Eval package root directory (for results/latest/ path). */
18
+ rootDir: string;
19
+ /** Optional output path override (--output flag). */
20
+ outputPath?: string;
21
+ /** API base URL (for metadata). */
22
+ apiUrl: string;
23
+ }
24
+ /**
25
+ * Fetch report artifacts from the API and write them to disk.
26
+ *
27
+ * Writes:
28
+ * - `results/latest/score-summary.json` — score data from job response
29
+ * - `results/latest/report.md` — full markdown report (if reportId present)
30
+ * - `results/latest/job-metadata.json` — job tracking info
31
+ * - `--output` path — markdown report (if specified)
32
+ */
33
+ export declare function writeRemoteResults(client: ApiClient, job: JobResponse, options: WriteResultsOptions): Promise<void>;
@@ -0,0 +1,97 @@
1
+ /**
2
+ * remote-results.ts — Write output artifacts from a remote evaluation.
3
+ *
4
+ * Produces the same file layout as local mode so downstream tools
5
+ * (workflow PR comments, score comparison, baseline save) work unchanged:
6
+ *
7
+ * results/latest/score-summary.json — scores by area + overall
8
+ * results/latest/report.md — rendered markdown report
9
+ * results/latest/job-metadata.json — job ID, timing, API URL
10
+ *
11
+ * @see packages/eval/src/commands/remote-pipeline.ts — caller
12
+ */
13
+ import { mkdirSync, writeFileSync } from "fs";
14
+ import { resolve } from "path";
15
+ // ---------------------------------------------------------------------------
16
+ // Public API
17
+ // ---------------------------------------------------------------------------
18
+ /**
19
+ * Fetch report artifacts from the API and write them to disk.
20
+ *
21
+ * Writes:
22
+ * - `results/latest/score-summary.json` — score data from job response
23
+ * - `results/latest/report.md` — full markdown report (if reportId present)
24
+ * - `results/latest/job-metadata.json` — job tracking info
25
+ * - `--output` path — markdown report (if specified)
26
+ */
27
+ export async function writeRemoteResults(client, job, options) {
28
+ const resultsDir = resolve(options.rootDir, "results", "latest");
29
+ mkdirSync(resultsDir, { recursive: true });
30
+ // 1. Write score summary
31
+ const scoreSummary = buildScoreSummary(job);
32
+ writeFileSync(resolve(resultsDir, "score-summary.json"), JSON.stringify(scoreSummary, null, 2));
33
+ // 2. Fetch and write markdown report
34
+ let reportWritten = false;
35
+ if (job.reportId) {
36
+ try {
37
+ const markdown = await client.getReportMarkdown(job.reportId);
38
+ writeFileSync(resolve(resultsDir, "report.md"), markdown);
39
+ reportWritten = true;
40
+ // Also write to --output path if specified
41
+ if (options.outputPath) {
42
+ writeFileSync(options.outputPath, markdown);
43
+ }
44
+ }
45
+ catch (err) {
46
+ console.warn(` ⚠️ Could not fetch report: ${err instanceof Error ? err.message : String(err)}`);
47
+ }
48
+ }
49
+ // 3. Write job metadata
50
+ writeFileSync(resolve(resultsDir, "job-metadata.json"), JSON.stringify({
51
+ jobId: job.jobId,
52
+ status: job.status,
53
+ startedAt: job.startedAt ?? null,
54
+ completedAt: job.completedAt ?? null,
55
+ reportId: job.reportId ?? null,
56
+ reportUrl: job.reportUrl ?? null,
57
+ execution: job.execution ?? null,
58
+ apiUrl: options.apiUrl,
59
+ }, null, 2));
60
+ // 4. Print summary
61
+ console.log("");
62
+ console.log(`✅ Evaluation completed`);
63
+ console.log(` 📊 Results: ${resolve(resultsDir, "score-summary.json")}`);
64
+ if (reportWritten) {
65
+ console.log(` 📝 Report: ${resolve(resultsDir, "report.md")}`);
66
+ }
67
+ if (options.outputPath && reportWritten) {
68
+ console.log(` 📄 Output: ${options.outputPath}`);
69
+ }
70
+ if (job.reportUrl) {
71
+ console.log(` 🔗 Studio: ${job.reportUrl}`);
72
+ }
73
+ console.log(` 🏷️ Job ID: ${job.jobId}`);
74
+ }
75
+ // ---------------------------------------------------------------------------
76
+ // Helpers
77
+ // ---------------------------------------------------------------------------
78
+ /**
79
+ * Build a score summary object from the job response.
80
+ *
81
+ * The job response may contain a full scoreSummary (if the API includes it)
82
+ * or just minimal data. We build a structure that's compatible with the
83
+ * local pipeline's score-summary.json format.
84
+ */
85
+ function buildScoreSummary(job) {
86
+ // The job response from the API may include inline score data in the
87
+ // future. For now, we store what we have — the job metadata — so
88
+ // downstream tools can at least read the file and know a remote eval
89
+ // completed.
90
+ return {
91
+ _remote: true,
92
+ jobId: job.jobId,
93
+ status: job.status,
94
+ reportId: job.reportId ?? null,
95
+ completedAt: job.completedAt ?? null,
96
+ };
97
+ }
@@ -69,6 +69,9 @@ export function mapToResolvedConfig(opts, rootDir) {
69
69
  repoTasksPath: opts.repoTasksPath,
70
70
  reportStoreProjectId: opts.reportProjectId,
71
71
  reportStoreDataset: opts.reportDataset,
72
+ remote: opts.remote ?? false,
73
+ apiUrl: opts.apiUrl ?? "https://ailf-api.sanity.build",
74
+ apiKey: opts.apiKey,
72
75
  };
73
76
  }
74
77
  /**
@@ -58,6 +58,8 @@ export function mapRequestToConfig(request, rootDir) {
58
58
  repoTasksPath: undefined,
59
59
  callback: request.callback,
60
60
  jobId: request.jobId,
61
+ remote: false,
62
+ apiUrl: "https://ailf-api.sanity.build",
61
63
  };
62
64
  }
63
65
  function mapDebug(debug) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "0.1.5",
3
+ "version": "0.1.7",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "restricted"
@@ -64,6 +64,7 @@
64
64
  "devDependencies": {
65
65
  "@sanity/ailf-core": "workspace:*",
66
66
  "@sanity/ailf-shared": "workspace:*",
67
+ "@sanity/ailf-tasks": "workspace:*",
67
68
  "@types/js-yaml": "^4.0.9",
68
69
  "@types/node": "^22.13.1",
69
70
  "tsx": "^4.19.2",