npm - @sanity/ailf - Versions diffs - 3.1.1 → 3.3.0 - Mend

@sanity/ailf 3.1.1 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/dist/_vendor/ailf-core/artifact-registry.d.ts +1 -1
package/dist/_vendor/ailf-core/artifact-registry.js +188 -9
package/dist/_vendor/ailf-core/examples/index.d.ts +8 -8
package/dist/_vendor/ailf-core/examples/index.js +8 -8
package/dist/_vendor/ailf-core/types/branded-ids.d.ts +8 -6
package/dist/_vendor/ailf-core/types/branded-ids.js +35 -24
package/dist/_vendor/ailf-core/types/index.d.ts +11 -4
package/dist/_vendor/ailf-core/types/scoring-input.d.ts +6 -0
package/dist/artifact-capture/api-gateway-artifact-writer.js +5 -3
package/dist/artifact-capture/batching-api-gateway-artifact-writer.js +11 -7
package/dist/artifact-capture/fanout-artifact-writer.d.ts +9 -6
package/dist/artifact-capture/fanout-artifact-writer.js +9 -6
package/dist/artifact-capture/gcs-artifact-writer.js +17 -22
package/dist/artifact-capture/prepare-upload-body.d.ts +27 -0
package/dist/artifact-capture/prepare-upload-body.js +36 -0
package/dist/commands/init.d.ts +1 -1
package/dist/commands/init.js +3 -3
package/dist/commands/pipeline-action.js +51 -6
package/dist/commands/pipeline.js +1 -1
package/dist/commands/validate-tasks.d.ts +14 -3
package/dist/commands/validate-tasks.js +125 -81
package/dist/composition-root.js +7 -2
package/dist/index.d.ts +2 -0
package/dist/index.js +4 -0
package/dist/orchestration/pipeline-orchestrator.js +34 -5
package/dist/pipeline/calculate-scores.d.ts +6 -0
package/dist/pipeline/calculate-scores.js +2 -0
package/package.json +1 -1

package/dist/artifact-capture/prepare-upload-body.d.ts ADDED Viewed

@@ -0,0 +1,27 @@
+/**
+ * Shared preamble for uploading an artifact payload from an `ArtifactWriter`.
+ *
+ * All three `ArtifactWriter` implementations (direct GCS, API Gateway, batching
+ * API Gateway) must apply the same `redact → serialize → bytecount` pipeline so
+ * secrets are stripped before leaving the process. Routing each writer through
+ * this helper prevents drift — any future writer that skips the helper would
+ * fail the contract test in
+ * `src/__tests__/artifact-upload-redaction.test.ts`.
+ *
+ * NDJSON streaming is **not** handled here — each row is redacted independently
+ * by the NDJSON writer path before being concatenated into a part body.
+ */
+import type { ArtifactMime } from "../_vendor/ailf-core/index.d.ts";
+export interface PreparedUploadBody {
+    readonly body: string;
+    readonly bytes: number;
+}
+/**
+ * Redact, serialize, and size `payload` for upload.
+ *
+ * Serialization branches on `mime`:
+ * - `application/json` (and anything else JSON-shaped, including the
+ *   single-shot side of `application/x-ndjson`) → `JSON.stringify`.
+ * - `text/markdown` / `application/yaml` → coerce to string via `String()`.
+ */
+export declare function prepareUploadBody(payload: unknown, mime: ArtifactMime): PreparedUploadBody;

package/dist/artifact-capture/prepare-upload-body.js ADDED Viewed

@@ -0,0 +1,36 @@
+/**
+ * Shared preamble for uploading an artifact payload from an `ArtifactWriter`.
+ *
+ * All three `ArtifactWriter` implementations (direct GCS, API Gateway, batching
+ * API Gateway) must apply the same `redact → serialize → bytecount` pipeline so
+ * secrets are stripped before leaving the process. Routing each writer through
+ * this helper prevents drift — any future writer that skips the helper would
+ * fail the contract test in
+ * `src/__tests__/artifact-upload-redaction.test.ts`.
+ *
+ * NDJSON streaming is **not** handled here — each row is redacted independently
+ * by the NDJSON writer path before being concatenated into a part body.
+ */
+import { redactArtifactData } from "./redact-artifact.js";
+/**
+ * Redact, serialize, and size `payload` for upload.
+ *
+ * Serialization branches on `mime`:
+ * - `application/json` (and anything else JSON-shaped, including the
+ *   single-shot side of `application/x-ndjson`) → `JSON.stringify`.
+ * - `text/markdown` / `application/yaml` → coerce to string via `String()`.
+ */
+export function prepareUploadBody(payload, mime) {
+    const redacted = redactArtifactData(payload);
+    const body = serializeForMime(redacted, mime);
+    const bytes = Buffer.byteLength(body, "utf-8");
+    return { body, bytes };
+}
+function serializeForMime(payload, mime) {
+    if (mime === "text/markdown" || mime === "application/yaml") {
+        if (typeof payload === "string")
+            return payload;
+        return String(payload ?? "");
+    }
+    return JSON.stringify(payload);
+}

package/dist/commands/init.d.ts CHANGED Viewed

@@ -5,7 +5,7 @@
  * task files. The generated files are ready-to-edit starting points —
  * not live evaluation tasks.
  *
- * TypeScript output (default) uses define* helpers from @sanity/ailf-core
+ * TypeScript output (default) uses define* helpers from @sanity/ailf
  * for full IDE autocomplete and type checking. YAML output serializes the
  * parsed task data. JSON output is a plain serialization of the parsed data.
  *

package/dist/commands/init.js CHANGED Viewed

@@ -5,7 +5,7 @@
  * task files. The generated files are ready-to-edit starting points —
  * not live evaluation tasks.
  *
- * TypeScript output (default) uses define* helpers from @sanity/ailf-core
+ * TypeScript output (default) uses define* helpers from @sanity/ailf
  * for full IDE autocomplete and type checking. YAML output serializes the
  * parsed task data. JSON output is a plain serialization of the parsed data.
  *
@@ -258,7 +258,7 @@ async function runInit(opts) {
     if (format === "ts") {
         console.log();
         console.log(`  💡 TypeScript tasks (${taskExt}) give you full IDE autocomplete`);
-        console.log("     via defineTask() from @sanity/ailf-core.");
+        console.log("     via defineTask() from @sanity/ailf.");
     }
     console.log();
     console.log("  🔑 Retrieve the API key from 1Password (Sanity employees):");
@@ -289,7 +289,7 @@ const CUSTOM_PRESET_TS = `/**
  * @see https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/presets.md
  */
-import { definePreset } from "../_vendor/ailf-core/index.js"
+import { definePreset } from "@sanity/ailf"
 export default definePreset({
   name: "my-docs-evaluation",

package/dist/commands/pipeline-action.js CHANGED Viewed

@@ -191,12 +191,18 @@ export function computeResolvedOptions(opts) {
         // Smart default: full runs auto-publish when store is configured
         publishEnabled = reportStoreConfigured && !debugEnabled;
     }
+    // Resolve task source + repo tasks path before anything that depends on
+    // them (report store overrides, output dir). When --task-source=repo is
+    // set without --repo-tasks-path, default to ./.ailf/tasks/ — the location
+    // created by `ailf init`.
+    const resolvedTaskSourceType = resolveTaskSourceType(opts.taskSource);
+    const resolvedRepoTasksPath = resolveRepoTasksPath(callerCwd, opts.repoTasksPath, resolvedTaskSourceType);
     // Report store overrides — resolution order:
     //   1. Explicit CLI flags (--report-dataset, --report-project)
     //   2. Environment variables (AILF_REPORT_DATASET, AILF_REPORT_PROJECT_ID)
-    //   3. .ailf/config.yaml reportStore block (when --repo-tasks-path is set)
+    //   3. .ailf/config.yaml reportStore block (when repo tasks path is set)
     //   4. Eval dataset override (so perspective evals publish to the same dataset)
-    const repoConfig = loadRepoConfigIfPresent(opts.repoTasksPath);
+    const repoConfig = loadRepoConfigIfPresent(resolvedRepoTasksPath);
     const reportDataset = opts.reportDataset ??
         process.env.AILF_REPORT_DATASET ??
         repoConfig?.reportStore?.dataset ??
@@ -211,10 +217,6 @@ export function computeResolvedOptions(opts) {
     const apiUrl = opts.apiUrl ?? process.env.AILF_API_URL ?? "https://ailf-api.sanity.build";
     const apiKey = process.env.AILF_API_KEY ?? undefined;
     // Output directory: explicit --output-dir → $CWD/.ailf/results/latest/
-    const resolvedRepoTasksPath = opts.repoTasksPath
-        ? resolve(callerCwd, opts.repoTasksPath)
-        : undefined;
-    const resolvedTaskSourceType = resolveTaskSourceType(opts.taskSource);
     const outputDir = resolveOutputDir(opts.outputDir);
     return {
         allowedOriginArgs,
@@ -299,6 +301,39 @@ function resolveTaskSourceType(raw) {
     console.error(`❌ Invalid --task-source "${raw}". Must be "repo" or "content-lake".`);
     process.exit(1);
 }
+/**
+ * Resolve the repo tasks path.
+ *
+ * - Explicit `--repo-tasks-path` wins (resolved relative to callerCwd).
+ * - When `--task-source=repo` is set without a path, defaults to
+ *   `./.ailf/tasks/` in callerCwd — the location created by `ailf init`.
+ * - Otherwise returns undefined (Content Lake source).
+ *
+ * Exits with a helpful error when an explicit path doesn't exist, or when
+ * the repo source was requested but no tasks directory can be found.
+ */
+function resolveRepoTasksPath(callerCwd, explicitPath, taskSourceType) {
+    if (explicitPath) {
+        const abs = resolve(callerCwd, explicitPath);
+        if (!existsSync(abs)) {
+            console.error(`❌ Repo tasks directory not found: ${abs}\n` +
+                "   Provide a valid --repo-tasks-path, or run 'ailf init' to scaffold .ailf/tasks/.");
+            process.exit(1);
+        }
+        return abs;
+    }
+    if (taskSourceType === "repo") {
+        const defaultPath = resolve(callerCwd, ".ailf", "tasks");
+        if (!existsSync(defaultPath)) {
+            console.error(`❌ --task-source=repo was set but no tasks directory was found.\n` +
+                `   Looked for: ${defaultPath}\n` +
+                "   Run 'ailf init' to scaffold .ailf/tasks/, or pass --repo-tasks-path <path>.");
+            process.exit(1);
+        }
+        return defaultPath;
+    }
+    return undefined;
+}
 // ---------------------------------------------------------------------------
 // Pipeline entry point
 // ---------------------------------------------------------------------------
@@ -330,6 +365,16 @@ export async function executePipeline(cliOpts) {
         if (cliOpts.repoTasksPath) {
             config.repoTasksPath = resolve(callerCwd, cliOpts.repoTasksPath);
         }
+        else if (config.taskSourceType === "repo" && !config.repoTasksPath) {
+            // Default: when taskSource=repo but no path set, look in .ailf/tasks/
+            // (matches the `ailf init` scaffold location). Silent fallback here —
+            // composition root will surface a helpful error if the directory is
+            // missing.
+            const defaultPath = resolve(callerCwd, ".ailf", "tasks");
+            if (existsSync(defaultPath)) {
+                config.repoTasksPath = defaultPath;
+            }
+        }
         if (cliOpts.output) {
             config.outputPath = resolve(callerCwd, cliOpts.output);
         }

package/dist/commands/pipeline.js CHANGED Viewed

@@ -51,7 +51,7 @@ export function createPipelineCommand() {
         .option("--output-dir <path>", "Base directory for pipeline output artifacts (default: inferred from execution context)")
         .option("--promptfoo-url <url>", "Promptfoo share URL for report")
         .option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), repo (repo tasks only, no Content Lake merge)", "content-lake")
-        .option("--repo-tasks-path <path>", "Path to repo-based task definitions (.ailf/tasks/ directory)")
+        .option("--repo-tasks-path <path>", "Path to repo-based task definitions. Defaults to ./.ailf/tasks/ when --task-source=repo.")
         .option("--remote", "Submit evaluation to the AILF API instead of running locally", false)
         .option("--api-url <url>", "AILF API base URL (default: https://ailf-api.sanity.build)")
         .option("--no-artifacts", "Disable all artifact writers (D0033). Overrides --artifacts-dir.")

package/dist/commands/validate-tasks.d.ts CHANGED Viewed

@@ -1,9 +1,9 @@
 /**
  * validate-tasks command — standalone validation of task files.
  *
- * Validates .ailf/tasks/*.yaml files against the CanonicalTaskSchema without
- * running the full pipeline. Useful for pre-commit hooks and CI checks
- * in external repos.
+ * Validates .ailf/tasks/*.yaml and .ailf/tasks/*.task.ts files against the
+ * CanonicalTaskSchema without running the full pipeline. Useful for
+ * pre-commit hooks and CI checks in external repos.
  *
  * Usage:
  *   ailf validate-tasks .ailf/tasks/
@@ -11,6 +11,17 @@
  *
  * @see packages/eval/src/adapters/task-sources/repo-schemas.ts
  * @see packages/eval/src/adapters/task-sources/repo-validation.ts
+ * @see packages/eval/src/adapters/task-sources/task-file-loader.ts
  */
 import { Command } from "commander";
+export interface ValidateTasksOptions {
+    strict: boolean;
+    callerCwd?: string;
+}
 export declare function createValidateTasksCommand(): Command;
+/**
+ * Execute the validate-tasks command logic. Returns the exit code (0 success,
+ * 1 failure) so callers can decide how to surface it — the CLI wrapper calls
+ * `process.exit`, tests can assert directly.
+ */
+export declare function runValidateTasks(tasksPath: string, opts: ValidateTasksOptions): Promise<number>;

package/dist/commands/validate-tasks.js CHANGED Viewed

@@ -1,9 +1,9 @@
 /**
  * validate-tasks command — standalone validation of task files.
  *
- * Validates .ailf/tasks/*.yaml files against the CanonicalTaskSchema without
- * running the full pipeline. Useful for pre-commit hooks and CI checks
- * in external repos.
+ * Validates .ailf/tasks/*.yaml and .ailf/tasks/*.task.ts files against the
+ * CanonicalTaskSchema without running the full pipeline. Useful for
+ * pre-commit hooks and CI checks in external repos.
  *
  * Usage:
  *   ailf validate-tasks .ailf/tasks/
@@ -11,97 +11,141 @@
  *
  * @see packages/eval/src/adapters/task-sources/repo-schemas.ts
  * @see packages/eval/src/adapters/task-sources/repo-validation.ts
+ * @see packages/eval/src/adapters/task-sources/task-file-loader.ts
  */
 import { existsSync, readdirSync, readFileSync } from "fs";
-import { resolve, relative } from "path";
+import { resolve, relative, basename } from "path";
 import { Command } from "commander";
 import { load } from "js-yaml";
 import { detectLegacyFieldNames, parseCanonicalTaskFile, } from "../adapters/task-sources/repo-schemas.js";
 import { validateCanonicalTasks, formatValidationResult, } from "../adapters/task-sources/repo-validation.js";
+import { discoverTsTaskFiles, loadTsTaskFile, } from "../adapters/task-sources/task-file-loader.js";
 export function createValidateTasksCommand() {
     return new Command("validate-tasks")
-        .description("Validate task YAML files (.ailf/tasks/) against the canonical schema")
+        .description("Validate task files (YAML and TypeScript) in .ailf/tasks/ against the canonical schema")
         .argument("[path]", "Path to tasks directory (default: .ailf/tasks/)", ".ailf/tasks")
         .option("--strict", "Treat warnings as errors", false)
         .action(async (tasksPath, opts) => {
-        // Resolve relative to the caller's working directory, not the
-        // eval package root (which differs when run via bin/ailf.js)
-        const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
-        const resolvedPath = resolve(callerCwd, tasksPath);
-        if (!existsSync(resolvedPath)) {
-            console.error(`Directory not found: ${resolvedPath}`);
-            process.exit(1);
+        const exitCode = await runValidateTasks(tasksPath, opts);
+        process.exit(exitCode);
+    });
+}
+/**
+ * Execute the validate-tasks command logic. Returns the exit code (0 success,
+ * 1 failure) so callers can decide how to surface it — the CLI wrapper calls
+ * `process.exit`, tests can assert directly.
+ */
+export async function runValidateTasks(tasksPath, opts) {
+    // Resolve relative to the caller's working directory, not the
+    // eval package root (which differs when run via bin/ailf.js)
+    const callerCwd = opts.callerCwd ?? process.env.AILF_CALLER_CWD ?? process.cwd();
+    const resolvedPath = resolve(callerCwd, tasksPath);
+    if (!existsSync(resolvedPath)) {
+        console.error(`Directory not found: ${resolvedPath}`);
+        return 1;
+    }
+    const yamlFiles = readdirSync(resolvedPath).filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."));
+    const tsFiles = discoverTsTaskFiles(resolvedPath);
+    const fileCount = yamlFiles.length + tsFiles.length;
+    if (fileCount === 0) {
+        console.error(`No task files found in ${resolvedPath}\n` +
+            "  Expected .yaml, .yml, .task.ts, or .task.js files");
+        return 1;
+    }
+    console.log(`\nValidating ${fileCount} task file(s) in ${relative(process.cwd(), resolvedPath)}/\n`);
+    let totalTasks = 0;
+    let hasErrors = false;
+    const allTasks = [];
+    for (const file of yamlFiles) {
+        const filePath = resolve(resolvedPath, file);
+        const raw = readFileSync(filePath, "utf-8");
+        let parsed;
+        try {
+            parsed = load(raw);
         }
-        const yamlFiles = readdirSync(resolvedPath).filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."));
-        if (yamlFiles.length === 0) {
-            console.error(`No YAML files found in ${resolvedPath}`);
-            process.exit(1);
+        catch (err) {
+            const msg = err instanceof Error ? err.message : String(err);
+            console.error(`  ${file}: YAML parse error`);
+            console.error(`     ${msg}\n`);
+            hasErrors = true;
+            continue;
         }
-        console.log(`\nValidating ${yamlFiles.length} task file(s) in ${relative(process.cwd(), resolvedPath)}/\n`);
-        let totalTasks = 0;
-        let hasErrors = false;
-        const allTasks = [];
-        for (const file of yamlFiles) {
-            const filePath = resolve(resolvedPath, file);
-            const raw = readFileSync(filePath, "utf-8");
-            let parsed;
-            try {
-                parsed = load(raw);
-            }
-            catch (err) {
-                const msg = err instanceof Error ? err.message : String(err);
-                console.error(`  ${file}: YAML parse error`);
-                console.error(`     ${msg}\n`);
-                hasErrors = true;
-                continue;
-            }
-            if (!Array.isArray(parsed)) {
-                console.error(`  ${file}: Expected a YAML array of task definitions`);
-                hasErrors = true;
-                continue;
-            }
-            // Detect legacy field names before Zod validation
-            const legacyWarnings = detectLegacyFieldNames(parsed, file);
-            if (legacyWarnings.length > 0) {
-                console.error(`  ${file}: Uses legacy field names`);
-                for (const w of legacyWarnings) {
-                    console.error(`     ${w}`);
-                }
-                console.error();
-                hasErrors = true;
-                continue;
-            }
-            try {
-                const tasks = parseCanonicalTaskFile(parsed, file);
-                console.log(`  ${file}: ${tasks.length} task${tasks.length === 1 ? "" : "s"} valid`);
-                totalTasks += tasks.length;
-                allTasks.push(...tasks);
-            }
-            catch (err) {
-                const msg = err instanceof Error ? err.message : String(err);
-                console.error(`  ${file}: Schema validation failed`);
-                console.error(`${msg
-                    .split("\n")
-                    .map((l) => `     ${l}`)
-                    .join("\n")}\n`);
-                hasErrors = true;
-            }
+        if (!Array.isArray(parsed)) {
+            console.error(`  ${file}: Expected a YAML array of task definitions`);
+            hasErrors = true;
+            continue;
         }
-        // Run semantic validation on all parsed tasks
-        if (allTasks.length > 0) {
-            console.log(); // blank line
-            const semanticResult = validateCanonicalTasks(allTasks);
-            const formatted = formatValidationResult(semanticResult);
-            console.log(formatted);
-            if (!semanticResult.valid) {
-                hasErrors = true;
-            }
-            if (opts.strict && semanticResult.warnings.length > 0) {
-                hasErrors = true;
-                console.log("\n  --strict mode: warnings treated as errors");
-            }
+        if (!validateTaskArray(parsed, file, allTasks)) {
+            hasErrors = true;
+            continue;
         }
-        console.log(`\n${hasErrors ? "FAIL" : "OK"} ${totalTasks} task${totalTasks === 1 ? "" : "s"} across ${yamlFiles.length} file${yamlFiles.length === 1 ? "" : "s"}\n`);
-        process.exit(hasErrors ? 1 : 0);
-    });
+        totalTasks += parsed.length;
+    }
+    for (const tsFilePath of tsFiles) {
+        const file = basename(tsFilePath);
+        let loaded;
+        try {
+            loaded = await loadTsTaskFile(tsFilePath);
+        }
+        catch (err) {
+            const msg = err instanceof Error ? err.message : String(err);
+            console.error(`  ${file}: Failed to load TypeScript task file`);
+            console.error(`     ${msg}\n`);
+            hasErrors = true;
+            continue;
+        }
+        if (!validateTaskArray(loaded.tasks, file, allTasks)) {
+            hasErrors = true;
+            continue;
+        }
+        totalTasks += loaded.tasks.length;
+    }
+    if (allTasks.length > 0) {
+        console.log();
+        const semanticResult = validateCanonicalTasks(allTasks);
+        const formatted = formatValidationResult(semanticResult);
+        console.log(formatted);
+        if (!semanticResult.valid) {
+            hasErrors = true;
+        }
+        if (opts.strict && semanticResult.warnings.length > 0) {
+            hasErrors = true;
+            console.log("\n  --strict mode: warnings treated as errors");
+        }
+    }
+    console.log(`\n${hasErrors ? "FAIL" : "OK"} ${totalTasks} task${totalTasks === 1 ? "" : "s"} across ${fileCount} file${fileCount === 1 ? "" : "s"}\n`);
+    return hasErrors ? 1 : 0;
+}
+/**
+ * Validate an array of raw task entries — runs the legacy-field detector and
+ * the canonical Zod schema, appending valid tasks to `accumulator`.
+ *
+ * Returns `true` when the file is fully valid, `false` when any error was
+ * reported (the caller is responsible for flipping its own error flag).
+ */
+function validateTaskArray(entries, file, accumulator) {
+    const legacyWarnings = detectLegacyFieldNames(entries, file);
+    if (legacyWarnings.length > 0) {
+        console.error(`  ${file}: Uses legacy field names`);
+        for (const w of legacyWarnings) {
+            console.error(`     ${w}`);
+        }
+        console.error();
+        return false;
+    }
+    try {
+        const tasks = parseCanonicalTaskFile(entries, file);
+        console.log(`  ${file}: ${tasks.length} task${tasks.length === 1 ? "" : "s"} valid`);
+        accumulator.push(...tasks);
+        return true;
+    }
+    catch (err) {
+        const msg = err instanceof Error ? err.message : String(err);
+        console.error(`  ${file}: Schema validation failed`);
+        console.error(`${msg
+            .split("\n")
+            .map((l) => `     ${l}`)
+            .join("\n")}\n`);
+        return false;
+    }
 }

package/dist/composition-root.js CHANGED Viewed

@@ -188,14 +188,19 @@ export function createArtifactWriter(config, logger, progress) {
         exclude,
         ...(remote ? {} : { progress }),
     });
+    // W0064 — when a remote backend is wired, list it first so its ArtifactRef
+    // wins the fanout's firstNonNull() selection and the published manifest
+    // points at a cross-machine-readable store. Local stays attached as the
+    // resilience tier: if the remote leg fails, firstNonNull falls through to
+    // local and the pipeline still produces a non-null ref.
     const base = remote
-        ? new FanoutArtifactWriter([local, remote], { progress })
+        ? new FanoutArtifactWriter([remote, local], { progress })
         : local;
     if (!remote) {
         logger.debug(`Artifact writer: LocalFilesystemArtifactWriter only (rootDir=${rootDir})`);
     }
     else {
-        logger.debug(`Artifact writer: FanoutArtifactWriter([local=${rootDir}, ${remote.constructor.name}])`);
+        logger.debug(`Artifact writer: FanoutArtifactWriter([${remote.constructor.name}, local=${rootDir}])`);
     }
     // Wrap in the accumulator so FinalizeRunStep can build a populated
     // RunManifest without each producer bookkeeping its own ArtifactRefs

package/dist/index.d.ts CHANGED Viewed

@@ -39,3 +39,5 @@ export { env } from "./_vendor/ailf-core/index.d.ts";
 export type { AgentHarnessTaskDefinition, CustomTaskDefinition, GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, GeneralizedValueAssertion, IdDocRef, KnowledgeProbeTaskDefinition, LiteracyTaskDefinition, MCPServerTaskDefinition, PathDocRef, PerspectiveDocRef, RubricRef, SlugDocRef, TaskCommonFields, TaskDifficulty, TaskOptions, TaskProviderConfig, TaskStatus, } from "./_vendor/ailf-core/index.d.ts";
 export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, RUBRIC_TEMPLATE_NAMES, type CanonicalTask, type CuratedAssertionType, type RubricTemplateName, } from "./adapters/task-sources/repo-schemas.js";
 export { formatValidationResult, validateCanonicalTasks, type ValidationMessage, type ValidationResult, } from "./adapters/task-sources/repo-validation.js";
+export { InMemoryPluginRegistry } from "./_vendor/ailf-core/index.d.ts";
+export type { CompilationContext, ModeBase, ModeCompileResult, ModeHandler, PresetDefinition, } from "./_vendor/ailf-core/index.d.ts";

package/dist/index.js CHANGED Viewed

@@ -46,3 +46,7 @@ export { env } from "./_vendor/ailf-core/index.js";
 // ---------------------------------------------------------------------------
 export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, RUBRIC_TEMPLATE_NAMES, } from "./adapters/task-sources/repo-schemas.js";
 export { formatValidationResult, validateCanonicalTasks, } from "./adapters/task-sources/repo-validation.js";
+// ---------------------------------------------------------------------------
+// Plugin extension points — for authoring custom presets, modes, and registries
+// ---------------------------------------------------------------------------
+export { InMemoryPluginRegistry } from "./_vendor/ailf-core/index.js";

package/dist/orchestration/pipeline-orchestrator.js CHANGED Viewed

@@ -130,6 +130,12 @@ export async function orchestratePipeline(ctx, steps) {
     const pipelineStart = Date.now();
     const hasJob = !!ctx.config.jobId;
     const jobUpdates = [];
+    // DOC-2064 — tracks whether the pre-finalize pipelineContext emit fired so
+    // the post-loop fallback can skip redundant writes. A second emit to the
+    // same GCS path produces a 412 Precondition Failed from the signed-URL
+    // writer (which enforces no-overwrite), logging spurious warnings on every
+    // successful run.
+    let pipelineContextEmitted = false;
     ctx.logger.section("ai-literacy-framework — Evaluation Pipeline");
     ctx.logger.debug(`Pipeline starting with ${steps.length} steps`, {
         steps: steps.map((s) => s.name),
@@ -152,6 +158,16 @@ export async function orchestratePipeline(ctx, steps) {
         ctx.logger.debug(`Starting step ${i + 1}/${steps.length}: ${step.name}`);
         ctx.logger.section(step.name);
         exportPhase.maybeOpen(step.name);
+        // DOC-2064 — emit pipelineContext BEFORE finalize-run so the artifact
+        // ref registers with the accumulator and lands in RunManifest.artifacts,
+        // which PublishReportStep then snapshots into Report.artifactManifest.
+        // The previous post-loop emit ran after publish and was invisible to
+        // Content Lake readers. The failure-path capture below still fires on
+        // pre-finalize aborts so aborted runs retain the on-disk artifact.
+        if (step.name === "finalize-run") {
+            await capturePipelineContext(ctx, state, results);
+            pipelineContextEmitted = true;
+        }
         // Report current step progress
         if (hasJob) {
             await reportJobProgress(ctx, step.name, i, steps.length, "running", undefined, jobUpdates);
@@ -175,8 +191,12 @@ export async function orchestratePipeline(ctx, steps) {
             }
             // Capture pipeline context before exiting. `job-updates` was an
             // observability-only capture not tied to a registered artifact type;
-            // dropped in W0050. Use the JobStore path for job telemetry.
-            await capturePipelineContext(ctx, state, results);
+            // dropped in W0050. Use the JobStore path for job telemetry. Skip
+            // when the pre-finalize emit already fired to avoid a 412 overwrite
+            // warning (DOC-2064).
+            if (!pipelineContextEmitted) {
+                await capturePipelineContext(ctx, state, results);
+            }
             exportPhase.close();
             return {
                 belowCritical: state.belowCritical,
@@ -231,9 +251,18 @@ export async function orchestratePipeline(ctx, steps) {
             ctx.logger.warn("Failed to report job completion — continuing");
         }
     }
-    // Capture pipeline context. `job-updates` observability captures were
-    // dropped in Slice 6.1 — JobStore is the supported telemetry path.
-    await capturePipelineContext(ctx, state, results);
+    // DOC-2064 — post-loop fallback. Only fires when the pre-finalize emit
+    // inside the step loop didn't run — typically because the pipeline has no
+    // finalize-run step (test harnesses, air-gapped runs). Skipping this when
+    // the pre-finalize emit already fired avoids a 412 Precondition Failed
+    // from the signed-URL writer, which refuses to overwrite the existing
+    // path. The tradeoff is that pipelineContext captures pipeline state as
+    // of finalize-run, not post-publish — reportId is absent. Acceptable
+    // because runId is the primary join key and reportId is trivially
+    // looked up from Content Lake via runId.
+    if (!pipelineContextEmitted) {
+        await capturePipelineContext(ctx, state, results);
+    }
     exportPhase.close();
     return {
         belowCritical: state.belowCritical,

package/dist/pipeline/calculate-scores.d.ts CHANGED Viewed

@@ -47,6 +47,12 @@ export interface RawTestResult {
     };
     response: {
         output: string;
+        tokenUsage?: {
+            cached?: number;
+            completion?: number;
+            prompt?: number;
+            total?: number;
+        };
     };
     testCase?: {
         description?: string;

package/dist/pipeline/calculate-scores.js CHANGED Viewed

@@ -223,6 +223,7 @@ export function extractStoredTestResults(resultsPath) {
             }
             dimensions.push({ dimension, reason, score });
         }
+        const tokenUsage = result.response?.tokenUsage;
         testResults.push({
             area,
             cost: result.cost || undefined,
@@ -233,6 +234,7 @@ export function extractStoredTestResults(resultsPath) {
             responseOutput,
             ...(responseOutputTruncated && { responseOutputTruncated: true }),
             taskId,
+            ...(tokenUsage && { tokenUsage }),
             variant,
         });
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sanity/ailf",
-  "version": "3.1.1",
+  "version": "3.3.0",
   "private": false,
   "publishConfig": {
     "access": "public"