npm - @sanity/ailf - Versions diffs - 0.1.5 → 0.1.7 - Mend

@sanity/ailf 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/dist/_vendor/ailf-core/examples/index.d.ts +1 -1
package/dist/_vendor/ailf-core/examples/index.js +1 -1
package/dist/_vendor/ailf-core/ports/context.d.ts +6 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -53
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -2
package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
package/dist/_vendor/ailf-tasks/cli.js +61 -0
package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
package/dist/_vendor/ailf-tasks/index.js +16 -0
package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
package/dist/_vendor/ailf-tasks/parser.js +73 -0
package/dist/_vendor/ailf-tasks/schemas.d.ts +186 -0
package/dist/_vendor/ailf-tasks/schemas.js +176 -0
package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
package/dist/_vendor/ailf-tasks/validation.js +162 -0
package/dist/adapters/api-client/api-client.d.ts +75 -0
package/dist/adapters/api-client/api-client.js +201 -0
package/dist/adapters/api-client/build-request.d.ts +75 -0
package/dist/adapters/api-client/build-request.js +176 -0
package/dist/adapters/api-client/errors.d.ts +43 -0
package/dist/adapters/api-client/errors.js +68 -0
package/dist/adapters/api-client/format-error.d.ts +22 -0
package/dist/adapters/api-client/format-error.js +48 -0
package/dist/adapters/api-client/index.d.ts +13 -0
package/dist/adapters/api-client/index.js +12 -0
package/dist/adapters/api-client/progress.d.ts +26 -0
package/dist/adapters/api-client/progress.js +69 -0
package/dist/adapters/api-client/remediation.d.ts +19 -0
package/dist/adapters/api-client/remediation.js +76 -0
package/dist/adapters/api-client/types.d.ts +98 -0
package/dist/adapters/api-client/types.js +14 -0
package/dist/adapters/config-sources/file-config-adapter.js +2 -0
package/dist/adapters/task-sources/repo-schemas.d.ts +16 -181
package/dist/adapters/task-sources/repo-schemas.js +27 -184
package/dist/adapters/task-sources/repo-validation.d.ts +5 -46
package/dist/adapters/task-sources/repo-validation.js +5 -161
package/dist/commands/calculate-scores.js +2 -0
package/dist/commands/explain-handler.js +6 -0
package/dist/commands/fetch-docs.js +2 -0
package/dist/commands/generate-configs.js +2 -0
package/dist/commands/init.js +9 -9
package/dist/commands/pipeline-action.d.ts +3 -0
package/dist/commands/pipeline-action.js +13 -0
package/dist/commands/pipeline.d.ts +2 -0
package/dist/commands/pipeline.js +2 -0
package/dist/commands/pr-comment.js +2 -0
package/dist/commands/publish.js +2 -0
package/dist/commands/remote-pipeline.d.ts +27 -0
package/dist/commands/remote-pipeline.js +133 -0
package/dist/commands/remote-results.d.ts +33 -0
package/dist/commands/remote-results.js +97 -0
package/dist/orchestration/build-app-context.js +3 -0
package/dist/pipeline/map-request-to-config.js +2 -0
package/package.json +2 -1

package/dist/adapters/task-sources/repo-validation.js CHANGED Viewed

@@ -1,164 +1,8 @@
 /**
- * Semantic validation for repo-based tasks.
+ * repo-validation.ts — Re-exports semantic validation from @sanity/ailf-tasks.
  *
- * Checks that go beyond Zod schema parsing:
- * - Assertion types are in the curated set
- * - Rubric template names resolve to known templates
- * - Feature area strings are well-formed
- * - Canonical doc slugs look reasonable (slugs, not URLs)
- *
- * These produce warnings, not errors — the pipeline can still run
- * with imperfect tasks. Only structural failures (caught by Zod) block.
- *
- * @see packages/eval/src/adapters/task-sources/repo-schemas.ts
- */
-import { CURATED_ASSERTION_TYPES, RUBRIC_TEMPLATE_NAMES, } from "./repo-schemas.js";
-// ---------------------------------------------------------------------------
-// Public API
-// ---------------------------------------------------------------------------
-/**
- * Run semantic validation on an array of parsed repo tasks.
- *
- * Returns warnings for issues that don't block execution (unknown feature
- * areas, unresolved slugs) and errors for issues that would cause pipeline
- * failures (completely missing required fields — though Zod catches most).
- */
-export function validateRepoTasks(tasks) {
-    const errors = [];
-    const warnings = [];
-    // Check for duplicate IDs
-    const seenIds = new Set();
-    for (const task of tasks) {
-        if (seenIds.has(task.id)) {
-            errors.push({
-                taskId: task.id,
-                field: "id",
-                message: `Duplicate task ID "${task.id}"`,
-            });
-        }
-        seenIds.add(task.id);
-    }
-    for (const task of tasks) {
-        // Check assertion types
-        for (let i = 0; i < task.assert.length; i++) {
-            const assertion = task.assert[i];
-            if (!CURATED_ASSERTION_TYPES.includes(assertion.type)) {
-                warnings.push({
-                    taskId: task.id,
-                    field: `assert[${i}].type`,
-                    message: `Unknown assertion type "${assertion.type}". ` +
-                        `Valid types: ${CURATED_ASSERTION_TYPES.join(", ")}`,
-                });
-            }
-            // Check rubric template for llm-rubric assertions
-            if (assertion.type === "llm-rubric" && "template" in assertion) {
-                const template = assertion.template;
-                if (!RUBRIC_TEMPLATE_NAMES.includes(template)) {
-                    warnings.push({
-                        taskId: task.id,
-                        field: `assert[${i}].template`,
-                        message: `Unknown rubric template "${template}". ` +
-                            `Valid templates: ${RUBRIC_TEMPLATE_NAMES.join(", ")}`,
-                    });
-                }
-            }
-        }
-        // Check canonical doc refs look reasonable
-        for (let i = 0; i < (task.canonicalDocs?.length ?? 0); i++) {
-            const doc = task.canonicalDocs[i];
-            // Slug refs: warn if they look like URLs or paths
-            if ("slug" in doc && !("id" in doc) && typeof doc.slug === "string") {
-                if (doc.slug.includes("/") || doc.slug.includes("http")) {
-                    warnings.push({
-                        taskId: task.id,
-                        field: `canonicalDocs[${i}].slug`,
-                        message: `Slug "${doc.slug}" looks like a URL or path — use 'path' type for paths or 'slug' for document slugs (e.g., "groq-introduction")`,
-                    });
-                }
-            }
-        }
-        // Check task has at least one llm-rubric assertion (recommended but not required)
-        const hasLlmRubric = task.assert.some((a) => a.type === "llm-rubric");
-        if (!hasLlmRubric) {
-            warnings.push({
-                taskId: task.id,
-                field: "assert",
-                message: "No llm-rubric assertion found. Tasks should have at least one scored rubric for meaningful evaluation.",
-            });
-        }
-        // Check taskPrompt exists in vars (vars.task)
-        if (!task.vars?.task) {
-            warnings.push({
-                taskId: task.id,
-                field: "vars.task",
-                message: "No task prompt found in vars.task. The LLM will receive an empty implementation request.",
-            });
-        }
-    }
-    return {
-        valid: errors.length === 0,
-        errors,
-        warnings,
-    };
-}
-/**
- * Format validation results for console output.
- */
-export function formatValidationResult(result) {
-    const lines = [];
-    if (result.errors.length > 0) {
-        lines.push("❌ Errors:");
-        for (const e of result.errors) {
-            lines.push(`  [${e.taskId}] ${e.field}: ${e.message}`);
-        }
-    }
-    if (result.warnings.length > 0) {
-        lines.push("⚠️  Warnings:");
-        for (const w of result.warnings) {
-            lines.push(`  [${w.taskId}] ${w.field}: ${w.message}`);
-        }
-    }
-    if (result.valid && result.warnings.length === 0) {
-        lines.push("✅ All repo tasks pass validation");
-    }
-    return lines.join("\n");
-}
-// ---------------------------------------------------------------------------
-// Snake_case detection (pre-parse helper)
-// ---------------------------------------------------------------------------
-/** Known snake_case → camelCase field mappings for common errors */
-const SNAKE_TO_CAMEL = {
-    feature_area: "featureArea",
-    canonical_docs: "canonicalDocs",
-    doc_coverage: "docCoverage",
-    reference_solution: "referenceSolution",
-};
-/**
- * Detect snake_case field names in raw task YAML data.
- *
- * This runs BEFORE Zod parsing to provide a user-friendly error message
- * when authors use framework-internal snake_case names instead of the
- * camelCase names expected in repo task files.
- *
- * @param raw - Raw parsed YAML (before Zod validation)
- * @param filename - Source filename for error messages
- * @returns Array of warning messages (empty if no issues)
+ * The validation logic is the single source of truth in @sanity/ailf-tasks.
+ * This file re-exports so existing eval-package importers don't need
+ * to change their import paths.
  */
-export function detectSnakeCaseFields(raw, filename) {
-    const warnings = [];
-    if (!Array.isArray(raw))
-        return warnings;
-    for (let i = 0; i < raw.length; i++) {
-        const entry = raw[i];
-        if (typeof entry !== "object" || entry === null)
-            continue;
-        const obj = entry;
-        const taskId = typeof obj.id === "string" ? obj.id : `task[${i}]`;
-        for (const [snake, camel] of Object.entries(SNAKE_TO_CAMEL)) {
-            if (snake in obj) {
-                warnings.push(`[${filename}] ${taskId}: Found "${snake}" — repo tasks use camelCase. Did you mean "${camel}"?`);
-            }
-        }
-    }
-    return warnings;
-}
+export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, } from "../../_vendor/ailf-tasks/index.js";

package/dist/commands/calculate-scores.js CHANGED Viewed

@@ -32,6 +32,8 @@ export function createCalculateScoresCommand() {
                 noRemoteCache: true,
                 searchMode: "open",
                 source: opts.source,
+                remote: false,
+                apiUrl: "https://ailf-api.sanity.build",
             });
             calculateAndWriteScores({
                 resultsPath,

package/dist/commands/explain-handler.js CHANGED Viewed

@@ -691,6 +691,12 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
         threshold: raw.threshold,
         url: raw.url ?? [],
         urls: raw.urls ?? [],
+        remote: raw.remote ?? false,
+        apiUrl: raw.apiUrl,
+        repoTasksPath: raw.repoTasksPath,
+        taskSource: raw.taskSource,
+        remoteCache: raw.remoteCache,
+        config: raw.config,
     };
     const resolved = computeResolvedOptions(withDefaults);
     const planOpts = {

package/dist/commands/fetch-docs.js CHANGED Viewed

@@ -53,6 +53,8 @@ async function executeFetchDocs(opts) {
         noRemoteCache: true,
         searchMode: "open",
         source: opts.source,
+        remote: false,
+        apiUrl: "https://ailf-api.sanity.build",
     });
     // Resolve source
     const overrides = configToSourceOverrides(ctx.config);

package/dist/commands/generate-configs.js CHANGED Viewed

@@ -31,6 +31,8 @@ export function createGenerateConfigsCommand() {
                 noRemoteCache: true,
                 searchMode: "open",
                 source: opts.source,
+                remote: false,
+                apiUrl: "https://ailf-api.sanity.build",
             });
             generateConfigs({
                 rootDir: ctx.config.rootDir,

package/dist/commands/init.js CHANGED Viewed

@@ -153,15 +153,15 @@ async function runInit(opts) {
     console.log();
     console.log("  Next steps:");
     console.log();
-    console.log(`  1. Customize the example tasks in ${rel(targetDir, tasksDir)}/`);
-    console.log("  2. Validate: npx @sanity/ailf validate-tasks .ailf/tasks/");
-    console.log("  3. Set AILF_API_KEY in your environment (e.g. in a local .env file)");
-    console.log("     and add it as a GitHub Actions secret (Settings → Secrets)");
-    console.log("  4. Push — the workflow at .github/workflows/ailf-eval.yml handles the rest");
+    console.log(`  1. Edit the example tasks in ${rel(targetDir, tasksDir)}/ — update`);
+    console.log("     slugs and prompts for your documentation");
+    console.log("  2. Validate locally: npx @sanity/ailf@latest validate-tasks .ailf/tasks/");
+    console.log("  3. Add AILF_API_KEY as a GitHub Actions secret");
+    console.log("     (Settings → Secrets and variables → Actions)");
+    console.log("  4. Push — the workflow at .github/workflows/ailf-eval.yml runs");
+    console.log("     automatically on PRs");
     console.log();
-    console.log("  💡 Get an API key with the 1Password CLI:");
-    console.log();
-    console.log("     brew install 1password-cli   # if not already installed");
-    console.log('     op read "op://Shared/AI Literacy Framework - Shared API Tokens/AILF_API_KEY_DEV"');
+    console.log("  💡 Test locally before pushing:");
+    console.log("     AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --debug");
     console.log();
 }

package/dist/commands/pipeline-action.d.ts CHANGED Viewed

@@ -48,10 +48,13 @@ export interface ResolvedOptions {
     skipFetch: boolean;
     source?: string;
     studioOriginOverride?: string;
+    remote: boolean;
     repoTasksPath?: string;
     taskOption?: string;
     taskSourceType?: "content-lake" | "yaml";
     urlArgs: string[];
+    apiUrl: string;
+    apiKey?: string;
 }
 /**
  * Pure option resolution — computes ResolvedOptions from CLI flags without

package/dist/commands/pipeline-action.js CHANGED Viewed

@@ -182,8 +182,14 @@ export function computeResolvedOptions(opts) {
         process.env.AILF_REPORT_PROJECT_ID ??
         repoConfig?.reportStore?.projectId ??
         undefined;
+    // Remote mode
+    const remote = opts.remote || process.env.AILF_REMOTE === "1";
+    const apiUrl = opts.apiUrl ?? process.env.AILF_API_URL ?? "https://ailf-api.sanity.build";
+    const apiKey = process.env.AILF_API_KEY ?? undefined;
     return {
         allowedOriginArgs,
+        apiKey,
+        apiUrl,
         areaOption,
         beforeOption,
         changedDocsOption,
@@ -209,6 +215,7 @@ export function computeResolvedOptions(opts) {
         publishEnabled,
         publishTag: opts.publishTag,
         readinessEnabled: opts.readiness,
+        remote,
         reportDataset,
         reportProjectId,
         sanityDocumentArgs,
@@ -270,6 +277,12 @@ export async function executePipeline(cliOpts) {
         process.exit(result.success ? 0 : 1);
     }
     const o = resolveOptions(cliOpts);
+    // Remote mode — submit to AILF API instead of running locally
+    if (o.remote) {
+        const { runRemotePipeline } = await import("./remote-pipeline.js");
+        await runRemotePipeline(o, ROOT);
+        return;
+    }
     // Dry-run: validate only, don't execute steps
     if (o.dryRun) {
         const { validateConfiguration } = await import("../pipeline/validate.js");

package/dist/commands/pipeline.d.ts CHANGED Viewed

@@ -52,11 +52,13 @@ export interface PipelineCliOptions {
     skipEval: boolean;
     skipFetch: boolean;
     source?: string;
+    remote: boolean;
     repoTasksPath?: string;
     task?: string;
     taskSource?: string;
     threshold?: number;
     url: string[];
     urls: string[];
+    apiUrl?: string;
 }
 export declare function createPipelineCommand(): Command;

package/dist/commands/pipeline.js CHANGED Viewed

@@ -41,6 +41,8 @@ export function createPipelineCommand() {
         .option("--promptfoo-url <url>", "Promptfoo share URL for report")
         .option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), yaml (tasks/*.yaml files, legacy)", "content-lake")
         .option("--repo-tasks-path <path>", "Path to repo-based task definitions (.ailf/tasks/ directory)")
+        .option("--remote", "Submit evaluation to the AILF API instead of running locally", false)
+        .option("--api-url <url>", "AILF API base URL (default: https://ailf-api.sanity.build)")
         .action(async (opts) => {
         const { executePipeline } = await import("./pipeline-action.js");
         await executePipeline(opts);

package/dist/commands/pr-comment.js CHANGED Viewed

@@ -31,6 +31,8 @@ export function createPrCommentCommand() {
                 noCache: true,
                 noRemoteCache: true,
                 searchMode: "open",
+                remote: false,
+                apiUrl: "https://ailf-api.sanity.build",
             });
             generatePrComment({
                 outputPath: opts.output,

package/dist/commands/publish.js CHANGED Viewed

@@ -92,6 +92,8 @@ async function runPublishCommand(summaryPath, opts) {
         searchMode: "open",
         skipEval: true,
         skipFetch: true,
+        remote: false,
+        apiUrl: "https://ailf-api.sanity.build",
     });
     const store = ctx.reportStore;
     const sinks = (ctx.sinks ?? []);

package/dist/commands/remote-pipeline.d.ts ADDED Viewed

@@ -0,0 +1,27 @@
+/**
+ * remote-pipeline.ts — Remote execution flow for `ailf pipeline --remote`.
+ *
+ * Reads local `.ailf/tasks/` YAML, validates locally via Zod schemas,
+ * submits to the AILF API, polls for completion, and writes the same
+ * output artifacts as local mode (score-summary.json, report.md,
+ * job-metadata.json).
+ *
+ * This module is the CLI-side counterpart of the API's POST /v1/pipeline
+ * endpoint. The CLI and API are published from the same monorepo, so the
+ * request/response shapes are always in sync.
+ *
+ * @see packages/eval/src/adapters/api-client/ — HTTP client + request builder
+ * @see docs/design-docs/cli-as-api-client.md — design doc
+ */
+import type { ResolvedOptions } from "./pipeline-action.js";
+/**
+ * Run the evaluation pipeline in remote mode.
+ *
+ * 1. Validate we have an API key
+ * 2. Find and validate local tasks (fail-fast with Zod errors)
+ * 3. Build the PipelineRequest payload
+ * 4. Submit to the AILF API
+ * 5. Poll for completion with progress display
+ * 6. Write output artifacts (score-summary.json, report.md, job-metadata.json)
+ */
+export declare function runRemotePipeline(opts: ResolvedOptions, rootDir: string): Promise<void>;

package/dist/commands/remote-pipeline.js ADDED Viewed

@@ -0,0 +1,133 @@
+/**
+ * remote-pipeline.ts — Remote execution flow for `ailf pipeline --remote`.
+ *
+ * Reads local `.ailf/tasks/` YAML, validates locally via Zod schemas,
+ * submits to the AILF API, polls for completion, and writes the same
+ * output artifacts as local mode (score-summary.json, report.md,
+ * job-metadata.json).
+ *
+ * This module is the CLI-side counterpart of the API's POST /v1/pipeline
+ * endpoint. The CLI and API are published from the same monorepo, so the
+ * request/response shapes are always in sync.
+ *
+ * @see packages/eval/src/adapters/api-client/ — HTTP client + request builder
+ * @see docs/design-docs/cli-as-api-client.md — design doc
+ */
+import { ZodError } from "zod";
+import { ApiClient, buildRemoteRequest, createProgressDisplay, formatJobError, resolveTasksDir, } from "../adapters/api-client/index.js";
+import { writeRemoteResults } from "./remote-results.js";
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+/**
+ * Run the evaluation pipeline in remote mode.
+ *
+ * 1. Validate we have an API key
+ * 2. Find and validate local tasks (fail-fast with Zod errors)
+ * 3. Build the PipelineRequest payload
+ * 4. Submit to the AILF API
+ * 5. Poll for completion with progress display
+ * 6. Write output artifacts (score-summary.json, report.md, job-metadata.json)
+ */
+export async function runRemotePipeline(opts, rootDir) {
+    // 1. Validate API key
+    if (!opts.apiKey) {
+        console.error("❌ AILF_API_KEY is required for remote evaluation.");
+        console.error("");
+        console.error("   Set it in your environment:");
+        console.error("   export AILF_API_KEY=ailf_live_sk_...");
+        process.exit(2);
+    }
+    const client = new ApiClient({
+        apiKey: opts.apiKey,
+        baseUrl: opts.apiUrl,
+    });
+    // 2. Find tasks directory
+    const tasksDir = resolveTasksDir(rootDir, opts.repoTasksPath);
+    // 3. Build request from local state (validates locally first)
+    const configSlice = toConfigSlice(opts);
+    let request;
+    let taskCount;
+    try {
+        const result = await buildRemoteRequest({
+            tasksDir,
+            config: configSlice,
+        });
+        request = result.request;
+        taskCount = result.taskCount;
+    }
+    catch (err) {
+        if (err instanceof ZodError) {
+            console.error("❌ Task validation failed:\n");
+            for (const issue of err.issues) {
+                console.error(`   ${issue.path.join(".")}: ${issue.message}`);
+            }
+            console.error("");
+            console.error("💡 Fix the issues above in your .ailf/tasks/ YAML files.");
+            process.exit(2);
+        }
+        throw err;
+    }
+    console.log(`📦 Found ${taskCount} task(s) in ${tasksDir}`);
+    // 4. Submit to API
+    const submission = await client.submitPipeline(request);
+    console.log(`🚀 Submitted job: ${submission.jobId}`);
+    if (submission.estimatedDurationMs) {
+        const mins = Math.ceil(submission.estimatedDurationMs / 60_000);
+        console.log(`⏱️  Estimated duration: ~${mins} minute(s)`);
+    }
+    // 5. Poll for completion with progress display
+    const job = await client.waitForCompletion(submission.jobId, {
+        onProgress: createProgressDisplay(),
+    });
+    // 6. Handle result
+    if (job.status !== "completed") {
+        console.error("");
+        console.error(formatJobError(job));
+        process.exit(1);
+    }
+    // 7. Fetch and write output artifacts
+    await writeRemoteResults(client, job, {
+        rootDir,
+        outputPath: opts.outputPath,
+        apiUrl: opts.apiUrl,
+    });
+}
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+/**
+ * Extract the subset of ResolvedOptions needed for building a remote request.
+ * This mapping keeps remote-pipeline.ts decoupled from the full ResolvedConfig.
+ */
+function toConfigSlice(opts) {
+    return {
+        mode: opts.mode,
+        debug: opts.debug,
+        areas: opts.areaOption
+            ?.split(",")
+            .map((s) => s.trim())
+            .filter(Boolean),
+        tasks: opts.taskOption
+            ?.split(",")
+            .map((s) => s.trim())
+            .filter(Boolean),
+        changedDocs: opts.changedDocsOption
+            ?.split(",")
+            .map((s) => s.trim())
+            .filter(Boolean),
+        source: opts.source,
+        compareEnabled: opts.compareEnabled,
+        compareThreshold: opts.compareThreshold,
+        publishEnabled: opts.publishEnabled,
+        publishTag: opts.publishTag,
+        concurrency: opts.concurrency,
+        datasetOverride: opts.datasetOverride,
+        projectIdOverride: opts.projectIdOverride,
+        perspectiveOverride: opts.perspectiveOverride,
+        graderReplications: opts.graderReplications,
+        gapAnalysisEnabled: opts.gapAnalysisEnabled,
+        readinessEnabled: opts.readinessEnabled,
+        discoveryReportEnabled: opts.discoveryReportEnabled,
+    };
+}

package/dist/commands/remote-results.d.ts ADDED Viewed

@@ -0,0 +1,33 @@
+/**
+ * remote-results.ts — Write output artifacts from a remote evaluation.
+ *
+ * Produces the same file layout as local mode so downstream tools
+ * (workflow PR comments, score comparison, baseline save) work unchanged:
+ *
+ *   results/latest/score-summary.json  — scores by area + overall
+ *   results/latest/report.md           — rendered markdown report
+ *   results/latest/job-metadata.json   — job ID, timing, API URL
+ *
+ * @see packages/eval/src/commands/remote-pipeline.ts — caller
+ */
+import type { ApiClient } from "../adapters/api-client/api-client.js";
+import type { JobResponse } from "../adapters/api-client/types.js";
+/** Options for writing remote results. */
+export interface WriteResultsOptions {
+    /** Eval package root directory (for results/latest/ path). */
+    rootDir: string;
+    /** Optional output path override (--output flag). */
+    outputPath?: string;
+    /** API base URL (for metadata). */
+    apiUrl: string;
+}
+/**
+ * Fetch report artifacts from the API and write them to disk.
+ *
+ * Writes:
+ * - `results/latest/score-summary.json` — score data from job response
+ * - `results/latest/report.md` — full markdown report (if reportId present)
+ * - `results/latest/job-metadata.json` — job tracking info
+ * - `--output` path — markdown report (if specified)
+ */
+export declare function writeRemoteResults(client: ApiClient, job: JobResponse, options: WriteResultsOptions): Promise<void>;

package/dist/commands/remote-results.js ADDED Viewed

@@ -0,0 +1,97 @@
+/**
+ * remote-results.ts — Write output artifacts from a remote evaluation.
+ *
+ * Produces the same file layout as local mode so downstream tools
+ * (workflow PR comments, score comparison, baseline save) work unchanged:
+ *
+ *   results/latest/score-summary.json  — scores by area + overall
+ *   results/latest/report.md           — rendered markdown report
+ *   results/latest/job-metadata.json   — job ID, timing, API URL
+ *
+ * @see packages/eval/src/commands/remote-pipeline.ts — caller
+ */
+import { mkdirSync, writeFileSync } from "fs";
+import { resolve } from "path";
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+/**
+ * Fetch report artifacts from the API and write them to disk.
+ *
+ * Writes:
+ * - `results/latest/score-summary.json` — score data from job response
+ * - `results/latest/report.md` — full markdown report (if reportId present)
+ * - `results/latest/job-metadata.json` — job tracking info
+ * - `--output` path — markdown report (if specified)
+ */
+export async function writeRemoteResults(client, job, options) {
+    const resultsDir = resolve(options.rootDir, "results", "latest");
+    mkdirSync(resultsDir, { recursive: true });
+    // 1. Write score summary
+    const scoreSummary = buildScoreSummary(job);
+    writeFileSync(resolve(resultsDir, "score-summary.json"), JSON.stringify(scoreSummary, null, 2));
+    // 2. Fetch and write markdown report
+    let reportWritten = false;
+    if (job.reportId) {
+        try {
+            const markdown = await client.getReportMarkdown(job.reportId);
+            writeFileSync(resolve(resultsDir, "report.md"), markdown);
+            reportWritten = true;
+            // Also write to --output path if specified
+            if (options.outputPath) {
+                writeFileSync(options.outputPath, markdown);
+            }
+        }
+        catch (err) {
+            console.warn(`  ⚠️  Could not fetch report: ${err instanceof Error ? err.message : String(err)}`);
+        }
+    }
+    // 3. Write job metadata
+    writeFileSync(resolve(resultsDir, "job-metadata.json"), JSON.stringify({
+        jobId: job.jobId,
+        status: job.status,
+        startedAt: job.startedAt ?? null,
+        completedAt: job.completedAt ?? null,
+        reportId: job.reportId ?? null,
+        reportUrl: job.reportUrl ?? null,
+        execution: job.execution ?? null,
+        apiUrl: options.apiUrl,
+    }, null, 2));
+    // 4. Print summary
+    console.log("");
+    console.log(`✅ Evaluation completed`);
+    console.log(`   📊 Results: ${resolve(resultsDir, "score-summary.json")}`);
+    if (reportWritten) {
+        console.log(`   📝 Report:  ${resolve(resultsDir, "report.md")}`);
+    }
+    if (options.outputPath && reportWritten) {
+        console.log(`   📄 Output:  ${options.outputPath}`);
+    }
+    if (job.reportUrl) {
+        console.log(`   🔗 Studio:  ${job.reportUrl}`);
+    }
+    console.log(`   🏷️  Job ID:  ${job.jobId}`);
+}
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+/**
+ * Build a score summary object from the job response.
+ *
+ * The job response may contain a full scoreSummary (if the API includes it)
+ * or just minimal data. We build a structure that's compatible with the
+ * local pipeline's score-summary.json format.
+ */
+function buildScoreSummary(job) {
+    // The job response from the API may include inline score data in the
+    // future. For now, we store what we have — the job metadata — so
+    // downstream tools can at least read the file and know a remote eval
+    // completed.
+    return {
+        _remote: true,
+        jobId: job.jobId,
+        status: job.status,
+        reportId: job.reportId ?? null,
+        completedAt: job.completedAt ?? null,
+    };
+}

package/dist/orchestration/build-app-context.js CHANGED Viewed

@@ -69,6 +69,9 @@ export function mapToResolvedConfig(opts, rootDir) {
         repoTasksPath: opts.repoTasksPath,
         reportStoreProjectId: opts.reportProjectId,
         reportStoreDataset: opts.reportDataset,
+        remote: opts.remote ?? false,
+        apiUrl: opts.apiUrl ?? "https://ailf-api.sanity.build",
+        apiKey: opts.apiKey,
     };
 }
 /**

package/dist/pipeline/map-request-to-config.js CHANGED Viewed

@@ -58,6 +58,8 @@ export function mapRequestToConfig(request, rootDir) {
         repoTasksPath: undefined,
         callback: request.callback,
         jobId: request.jobId,
+        remote: false,
+        apiUrl: "https://ailf-api.sanity.build",
     };
 }
 function mapDebug(debug) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sanity/ailf",
-  "version": "0.1.5",
+  "version": "0.1.7",
   "private": false,
   "publishConfig": {
     "access": "restricted"
@@ -64,6 +64,7 @@
   "devDependencies": {
     "@sanity/ailf-core": "workspace:*",
     "@sanity/ailf-shared": "workspace:*",
+    "@sanity/ailf-tasks": "workspace:*",
     "@types/js-yaml": "^4.0.9",
     "@types/node": "^22.13.1",
     "tsx": "^4.19.2",