npm - @sanity/ailf - Versions diffs - 2.1.0 → 2.3.0 - Mend

@sanity/ailf 2.1.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/config/rubrics.ts +3 -3
package/dist/_vendor/ailf-core/examples/index.d.ts +50 -1
package/dist/_vendor/ailf-core/examples/index.js +66 -1
package/dist/_vendor/ailf-core/types/index.d.ts +25 -0
package/dist/agent-harness/assertions-runtime.d.ts +49 -0
package/dist/agent-harness/assertions-runtime.js +138 -0
package/dist/agent-harness/provider.d.ts +58 -0
package/dist/agent-harness/provider.js +104 -0
package/dist/commands/calculate-scores.js +7 -2
package/dist/commands/capture-list.d.ts +1 -1
package/dist/commands/capture-list.js +6 -3
package/dist/commands/compare.js +11 -7
package/dist/commands/explain-handler.js +22 -24
package/dist/commands/fetch-docs.js +4 -2
package/dist/commands/generate-configs.js +6 -2
package/dist/commands/init.js +3 -0
package/dist/commands/pipeline-action.js +8 -24
package/dist/commands/pipeline.js +1 -1
package/dist/commands/pr-comment.js +6 -2
package/dist/commands/publish.d.ts +1 -0
package/dist/commands/publish.js +12 -8
package/dist/commands/remote-pipeline.js +1 -1
package/dist/commands/remote-results.d.ts +8 -8
package/dist/commands/remote-results.js +7 -7
package/dist/commands/shared/options.d.ts +8 -0
package/dist/commands/shared/options.js +10 -0
package/dist/commands/shared/resolve-output-dir.d.ts +27 -0
package/dist/commands/shared/resolve-output-dir.js +36 -0
package/dist/composition-root.js +1 -1
package/dist/config/rubrics.ts +3 -3
package/dist/orchestration/build-app-context.js +1 -1
package/dist/orchestration/steps/gap-analysis-step.js +86 -75
package/dist/orchestration/steps/generate-configs-step.d.ts +7 -0
package/dist/orchestration/steps/generate-configs-step.js +47 -2
package/dist/pipeline/calculate-scores.js +113 -2
package/dist/pipeline/compare.js +50 -19
package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +103 -25
package/dist/pipeline/compiler/compiler-to-yaml.js +78 -7
package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +15 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +42 -85
package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +22 -15
package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +8 -1
package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +42 -12
package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +3 -0
package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +1 -27
package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +2 -9
package/dist/pipeline/compiler/rubric-resolution.d.ts +40 -0
package/dist/pipeline/compiler/rubric-resolution.js +52 -0
package/dist/pipeline/compiler/scoring-bridge.js +59 -7
package/dist/pipeline/provenance.js +7 -1
package/dist/pipeline/validate.d.ts +5 -4
package/dist/pipeline/validate.js +34 -113
package/package.json +2 -1

package/dist/commands/calculate-scores.js CHANGED Viewed

@@ -9,18 +9,21 @@ import { fileURLToPath } from "url";
 import { Command } from "commander";
 import { createAppContext } from "../composition-root.js";
 import { calculateAndWriteScores } from "../pipeline/calculate-scores.js";
+import { addOutputDirOption } from "./shared/options.js";
+import { resolveOutputDir } from "./shared/resolve-output-dir.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..", "..");
 export function createCalculateScoresCommand() {
-    return new Command("calculate-scores")
+    const cmd = new Command("calculate-scores")
         .description("Calculate AI Literacy Scores from Promptfoo evaluation results")
         .option("--source <name>", "Documentation source name")
         .argument("[results-path]", "Path to eval-results.json")
         .action(async (resultsPath, opts) => {
         try {
+            const outputDir = resolveOutputDir(opts.outputDir);
             const ctx = createAppContext({
                 rootDir: ROOT,
-                outputDir: resolve(ROOT, "results", "latest"),
+                outputDir,
                 mode: "literacy",
                 noAutoScope: false,
                 skipFetch: true,
@@ -53,4 +56,6 @@ export function createCalculateScoresCommand() {
                 console.error(err.message);
         }
     });
+    addOutputDirOption(cmd);
+    return cmd;
 }

package/dist/commands/capture-list.d.ts CHANGED Viewed

@@ -5,7 +5,7 @@
  * reads each manifest, and prints a summary table sorted by date.
  *
  * Usage:
- *   ailf capture list                          # default: results/captures/
+ *   ailf capture list                          # default: .ailf/results/captures/
  *   ailf capture list ./my-captures            # custom directory
  */
 import { Command } from "commander";

package/dist/commands/capture-list.js CHANGED Viewed

@@ -5,22 +5,25 @@
  * reads each manifest, and prints a summary table sorted by date.
  *
  * Usage:
- *   ailf capture list                          # default: results/captures/
+ *   ailf capture list                          # default: .ailf/results/captures/
  *   ailf capture list ./my-captures            # custom directory
  */
 import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
 import { join, resolve } from "node:path";
 import { Command } from "commander";
+import { resolveOutputDir } from "./shared/resolve-output-dir.js";
 // ---------------------------------------------------------------------------
 // Command factory
 // ---------------------------------------------------------------------------
 export function createCaptureListCommand() {
     return new Command("list")
         .description("List pipeline captures in a directory")
-        .argument("[dir]", "Captures directory (default: results/captures/)")
+        .argument("[dir]", "Captures directory (default: .ailf/results/captures/)")
         .option("-f, --format <fmt>", "Output format: table or json", "table")
         .action(async (dir, opts) => {
-        const captureDir = resolve(dir ?? "results/captures");
+        const captureDir = dir
+            ? resolve(dir)
+            : resolve(resolveOutputDir(), "..", "captures");
         if (!existsSync(captureDir)) {
             console.error(`  No captures directory found at ${captureDir}`);
             console.error("  Run 'ailf pipeline --capture' to create captures.");

package/dist/commands/compare.js CHANGED Viewed

@@ -9,29 +9,31 @@ import { dirname, join, resolve } from "path";
 import { fileURLToPath } from "url";
 import { Command } from "commander";
 import { compare } from "../pipeline/compare.js";
+import { addOutputDirOption } from "./shared/options.js";
+import { resolveOutputDir } from "./shared/resolve-output-dir.js";
 import { DEFAULT_NOISE_THRESHOLD, } from "../pipeline/types.js";
 import { formatComparisonTable } from "../_vendor/ailf-core/index.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..", "..");
 const BASELINES_DIR = join(ROOT, "results", "baselines");
-const SCORE_SUMMARY_PATH = join(ROOT, "results", "latest", "score-summary.json");
 // ---------------------------------------------------------------------------
 // Helpers
 // ---------------------------------------------------------------------------
 export function createCompareCommand() {
-    return new Command("compare")
+    const cmd = new Command("compare")
         .description("Compare two evaluation score summaries")
         .option("-b, --baseline <path>", "Baseline score-summary.json (default: latest baseline)")
-        .option("-e, --experiment <path>", "Experiment score-summary.json (default: results/latest/score-summary.json)")
+        .option("-e, --experiment <path>", "Experiment score-summary.json (default: .ailf/results/latest/score-summary.json)")
         .option("-t, --threshold <n>", "Noise threshold for unchanged classification", parseFloat)
         .option("-o, --output <path>", "Write JSON report to file")
         .option("-f, --format <fmt>", "Output format: table or json", "table")
         .action(async (opts) => {
+        const outputDir = resolveOutputDir(opts.outputDir);
         const threshold = opts.threshold ?? DEFAULT_NOISE_THRESHOLD;
         // Resolve experiment path
         const expPath = opts.experiment
             ? resolve(opts.experiment)
-            : SCORE_SUMMARY_PATH;
+            : join(outputDir, "score-summary.json");
         const experiment = loadSummary(expPath);
         // Resolve baseline path
         let basePath;
@@ -48,7 +50,7 @@ export function createCompareCommand() {
         }
         const baseline = loadSummary(basePath);
         // Try to load grader consistency data for empirical thresholds
-        const consistencyPath = join(ROOT, "results", "latest", "grader-consistency.json");
+        const consistencyPath = join(outputDir, "grader-consistency.json");
         let graderConsistency;
         if (existsSync(consistencyPath) && opts.threshold === undefined) {
             try {
@@ -93,10 +95,12 @@ export function createCompareCommand() {
                 console.log(`  ✅ Comparison report also written to ${opts.output}`);
             }
         }
-        // Write comparison report to results/latest for other steps to consume
-        const latestComparisonPath = join(ROOT, "results", "latest", "comparison-report.json");
+        // Write comparison report to output dir for other steps to consume
+        const latestComparisonPath = join(outputDir, "comparison-report.json");
         writeFileSync(latestComparisonPath, JSON.stringify(report, null, 2));
     });
+    addOutputDirOption(cmd);
+    return cmd;
 }
 function findLatestBaseline() {
     if (!existsSync(BASELINES_DIR))

package/dist/commands/explain-handler.js CHANGED Viewed

@@ -23,6 +23,7 @@ import { TASK_FILE_NAMES } from "../_vendor/ailf-core/index.js";
 import { buildPipelinePlan, buildSimpleCommandPlan, } from "../pipeline/plan.js";
 import { formatPlanConsole, formatPlanJson } from "../pipeline/plan-format.js";
 import { computeResolvedOptions } from "./pipeline-action.js";
+import { getCallerCwd } from "./shared/resolve-output-dir.js";
 import { LiteracyVariant } from "../pipeline/normalize-mode.js";
 // ---------------------------------------------------------------------------
 // Registry
@@ -43,10 +44,10 @@ const EXPLAIN_REGISTRY = {
     "agent-report": {
         description: "Generate an agent behavior observation report from eval results",
         filesCreated: [
-            "results/latest/agent-report.json",
-            "results/latest/agent-report.md",
+            "<outputDir>/agent-report.json",
+            "<outputDir>/agent-report.md",
         ],
-        filesRead: ["results/latest/eval-results.json"],
+        filesRead: ["<outputDir>/eval-results.json"],
         steps: [
             {
                 cacheStatus: "miss",
@@ -82,9 +83,9 @@ const EXPLAIN_REGISTRY = {
     },
     "calculate-scores": {
         description: "Calculate AI Literacy Scores from Promptfoo evaluation results",
-        filesCreated: ["results/latest/score-summary.json"],
+        filesCreated: ["<outputDir>/score-summary.json"],
         filesRead: [
-            "results/latest/eval-results.json",
+            "<outputDir>/eval-results.json",
             "config/rubrics.ts",
             "config/models.ts",
         ],
@@ -104,23 +105,20 @@ const EXPLAIN_REGISTRY = {
             {
                 cacheStatus: "miss",
                 name: "Write summary",
-                reason: "Persist score-summary.json to results/latest/",
+                reason: "Persist score-summary.json to output directory",
                 willRun: true,
             },
         ],
     },
     compare: {
         description: "Compare current evaluation scores against a saved baseline",
-        filesCreated: ["results/latest/comparison-report.json"],
-        filesRead: [
-            "results/latest/score-summary.json",
-            "results/baselines/*.json",
-        ],
+        filesCreated: ["<outputDir>/comparison-report.json"],
+        filesRead: ["<outputDir>/score-summary.json", "results/baselines/*.json"],
         steps: [
             {
                 cacheStatus: "miss",
                 name: "Load current scores",
-                reason: "Read results/latest/score-summary.json",
+                reason: "Read <outputDir>/score-summary.json",
                 willRun: true,
             },
             {
@@ -181,8 +179,8 @@ const EXPLAIN_REGISTRY = {
     },
     "discovery-report": {
         description: "Generate agent discoverability report from agentic retrieval metrics",
-        filesCreated: ["results/latest/discovery-report.md"],
-        filesRead: ["results/latest/score-summary.json"],
+        filesCreated: ["<outputDir>/discovery-report.md"],
+        filesRead: ["<outputDir>/score-summary.json"],
         steps: [
             {
                 cacheStatus: "miss",
@@ -206,7 +204,7 @@ const EXPLAIN_REGISTRY = {
     },
     eval: {
         description: "Run Promptfoo evaluation directly (passthrough — all flags forwarded to promptfoo)",
-        filesCreated: ["results/latest/eval-results.json"],
+        filesCreated: ["<outputDir>/eval-results.json"],
         filesRead: ["promptfooconfig.yaml"],
         steps: [
             {
@@ -280,7 +278,7 @@ const EXPLAIN_REGISTRY = {
     grader: {
         description: "Grader reliability tools (consistency, compare, sensitivity, validate)",
         filesRead: [
-            "results/latest/eval-results.json",
+            "<outputDir>/eval-results.json",
             "config/rubrics.ts",
             "canonical/reference-solutions/",
         ],
@@ -312,7 +310,7 @@ const EXPLAIN_REGISTRY = {
     },
     "measure-retrieval": {
         description: "Measure Sanity text search retrieval quality against canonical document annotations",
-        filesCreated: ["results/latest/retrieval-metrics.json"],
+        filesCreated: ["<outputDir>/retrieval-metrics.json"],
         filesRead: ["tasks/literacy/*.task.ts"],
         steps: [
             {
@@ -337,7 +335,7 @@ const EXPLAIN_REGISTRY = {
     },
     "pr-comment": {
         description: "Generate a markdown PR comment from evaluation scores for CI posting",
-        filesRead: ["results/latest/score-summary.json"],
+        filesRead: ["<outputDir>/score-summary.json"],
         steps: [
             {
                 cacheStatus: "miss",
@@ -355,7 +353,7 @@ const EXPLAIN_REGISTRY = {
     },
     publish: {
         description: "Publish a local evaluation report to the Sanity Content Lake (standalone)",
-        filesRead: ["results/latest/score-summary.json"],
+        filesRead: ["<outputDir>/score-summary.json"],
         steps: [
             {
                 cacheStatus: "miss",
@@ -386,12 +384,12 @@ const EXPLAIN_REGISTRY = {
     "readiness-report": {
         description: "Generate launch readiness checklist for a feature area with threshold evaluation",
         filesRead: [
-            "results/latest/score-summary.json",
-            "results/latest/gap-analysis.json",
+            "<outputDir>/score-summary.json",
+            "<outputDir>/gap-analysis.json",
             "config/thresholds.ts",
             "results/baselines/",
         ],
-        filesCreated: ["results/latest/readiness-report.md"],
+        filesCreated: ["<outputDir>/readiness-report.md"],
         steps: [
             {
                 cacheStatus: "miss",
@@ -603,7 +601,7 @@ function buildInitExplainPlan(actionCommand, rootDir) {
     const configFile = format === "ts"
         ? "ailf.config.ts"
         : `config.${format === "yaml" ? "yaml" : "json"}`;
-    const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
+    const callerCwd = getCallerCwd();
     const targetDir = opts.path ?? ".";
     const ailfDir = `${targetDir}/.ailf`;
     const tasksDir = `${ailfDir}/tasks`;
@@ -664,7 +662,7 @@ function buildBaselineExplainPlan(actionCommand, rootDir) {
         command: `baseline ${subcommand}`,
         description: descriptions[subcommand] ?? `Baseline operation: ${subcommand}`,
         filesCreated: subcommand === "save" ? ["results/baselines/<timestamp>.json"] : [],
-        filesRead: ["results/latest/score-summary.json", "results/baselines/"],
+        filesRead: ["<outputDir>/score-summary.json", "results/baselines/"],
         rootDir,
     });
 }

package/dist/commands/fetch-docs.js CHANGED Viewed

@@ -11,7 +11,8 @@ import { Command } from "commander";
 import { createAppContext } from "../composition-root.js";
 import { loadSource } from "../sources.js";
 import { configToSourceOverrides } from "../orchestration/config-to-source-overrides.js";
-import { addSanitySourceOptions } from "./shared/options.js";
+import { addOutputDirOption, addSanitySourceOptions } from "./shared/options.js";
+import { resolveOutputDir } from "./shared/resolve-output-dir.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..", "..");
 export function createFetchDocsCommand() {
@@ -31,6 +32,7 @@ export function createFetchDocsCommand() {
         }
     });
     addSanitySourceOptions(cmd);
+    addOutputDirOption(cmd);
     return cmd;
 }
 // ---------------------------------------------------------------------------
@@ -41,7 +43,7 @@ async function executeFetchDocs(opts) {
     // Build a minimal ResolvedConfig for the composition root
     const ctx = createAppContext({
         rootDir: ROOT,
-        outputDir: resolve(ROOT, "results", "latest"),
+        outputDir: resolveOutputDir(opts.outputDir),
         mode: "literacy",
         noAutoScope: false,
         skipFetch: false,

package/dist/commands/generate-configs.js CHANGED Viewed

@@ -9,17 +9,19 @@ import { fileURLToPath } from "url";
 import { Command } from "commander";
 import { createAppContext } from "../composition-root.js";
 import { GenerateConfigsStep } from "../orchestration/steps/generate-configs-step.js";
+import { addOutputDirOption } from "./shared/options.js";
+import { resolveOutputDir } from "./shared/resolve-output-dir.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..", "..");
 export function createGenerateConfigsCommand() {
-    return new Command("generate-configs")
+    const cmd = new Command("generate-configs")
         .description("Generate promptfoo config files from config/models.yaml")
         .option("-s, --source <name>", "Documentation source name")
         .action(async (opts) => {
         try {
             const ctx = createAppContext({
                 rootDir: ROOT,
-                outputDir: resolve(ROOT, "results", "latest"),
+                outputDir: resolveOutputDir(opts.outputDir),
                 mode: "literacy",
                 noAutoScope: false,
                 skipFetch: true,
@@ -58,4 +60,6 @@ export function createGenerateConfigsCommand() {
                 console.error(err.message);
         }
     });
+    addOutputDirOption(cmd);
+    return cmd;
 }

package/dist/commands/init.js CHANGED Viewed

@@ -138,6 +138,9 @@ async function runInit(opts) {
     else if (modeFilter === "knowledge-probe") {
         stemsToWrite = taskStemsForMode("knowledge-probe");
     }
+    else if (modeFilter === "agent-harness") {
+        stemsToWrite = taskStemsForMode("agent-harness");
+    }
     else {
         // Default (no --mode): write all tasks
         stemsToWrite = [...TASK_FILE_NAMES];

package/dist/commands/pipeline-action.js CHANGED Viewed

@@ -21,6 +21,7 @@ import { buildStepSequence } from "../orchestration/build-step-sequence.js";
 import { orchestratePipeline } from "../orchestration/pipeline-orchestrator.js";
 import { load } from "js-yaml";
 import { parseRepoConfig, } from "../adapters/task-sources/repo-schemas.js";
+import { getCallerCwd, resolveOutputDir } from "./shared/resolve-output-dir.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..", "..");
 // ---------------------------------------------------------------------------
@@ -35,7 +36,7 @@ const VALID_SEARCH_MODES = ["open", "origin-only", "off"];
  */
 export function computeResolvedOptions(opts) {
     // Resolve paths relative to the caller's cwd, not the eval package root
-    const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
+    const callerCwd = getCallerCwd();
     // Validate + normalize mode via the single boundary function.
     // normalizeMode() maps legacy variant names (baseline, agentic, etc.)
     // to canonical mode "literacy" + variant, and throws on invalid input.
@@ -209,23 +210,12 @@ export function computeResolvedOptions(opts) {
     const remote = opts.remote || process.env.AILF_REMOTE === "1";
     const apiUrl = opts.apiUrl ?? process.env.AILF_API_URL ?? "https://ailf-api.sanity.build";
     const apiKey = process.env.AILF_API_KEY ?? undefined;
-    // Output directory: explicit flag → repo-task heuristic → default
+    // Output directory: explicit --output-dir → $CWD/.ailf/results/latest/
     const resolvedRepoTasksPath = opts.repoTasksPath
         ? resolve(callerCwd, opts.repoTasksPath)
         : undefined;
     const resolvedTaskSourceType = resolveTaskSourceType(opts.taskSource);
-    let outputDir;
-    if (opts.outputDir) {
-        outputDir = resolve(callerCwd, opts.outputDir);
-    }
-    else if (resolvedTaskSourceType === "repo" || resolvedRepoTasksPath) {
-        outputDir = resolvedRepoTasksPath
-            ? resolve(resolvedRepoTasksPath, "..", "results", "latest")
-            : resolve(callerCwd, ".ailf", "results", "latest");
-    }
-    else {
-        outputDir = resolve(ROOT, "results", "latest");
-    }
+    const outputDir = resolveOutputDir(opts.outputDir);
     return {
         allowedOriginArgs,
         apiKey,
@@ -310,7 +300,7 @@ export async function executePipeline(cliOpts) {
         }
         const { FileConfigAdapter } = await import("../adapters/config-sources/file-config-adapter.js");
         const { createAppContext } = await import("../composition-root.js");
-        const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
+        const callerCwd = getCallerCwd();
         const adapter = new FileConfigAdapter(cliOpts.config, ROOT);
         const config = await adapter.resolve();
         // Merge CLI-only flags that aren't in the config file.
@@ -323,13 +313,8 @@ export async function executePipeline(cliOpts) {
         if (cliOpts.output) {
             config.outputPath = resolve(callerCwd, cliOpts.output);
         }
-        // Output dir: explicit CLI flag → repo-task heuristic → file-config default
-        if (cliOpts.outputDir) {
-            config.outputDir = resolve(callerCwd, cliOpts.outputDir);
-        }
-        else if (config.repoTasksPath) {
-            config.outputDir = resolve(config.repoTasksPath, "..", "results", "latest");
-        }
+        // Output dir: explicit CLI flag → $CWD/.ailf/results/latest/
+        config.outputDir = resolveOutputDir(cliOpts.outputDir);
         // Create AppContext directly from the merged config so adapters
         // (especially taskSource) are wired from the file config's
         // taskSourceType — not from CLI defaults.
@@ -350,8 +335,7 @@ export async function executePipeline(cliOpts) {
     // cache which never contains .ailf/.
     if (o.remote) {
         const { runRemotePipeline } = await import("./remote-pipeline.js");
-        const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
-        await runRemotePipeline(o, callerCwd);
+        await runRemotePipeline(o, getCallerCwd());
         return;
     }
     // Dry-run: validate only, don't execute steps

package/dist/commands/pipeline.js CHANGED Viewed

@@ -55,7 +55,7 @@ export function createPipelineCommand() {
         .option("--remote", "Submit evaluation to the AILF API instead of running locally", false)
         .option("--api-url <url>", "AILF API base URL (default: https://ailf-api.sanity.build)")
         .option("--capture", "Enable artifact capture for this run", false)
-        .option("--capture-dir <path>", "Base directory for capture output (default: results/captures/)")
+        .option("--capture-dir <path>", "Base directory for capture output (default: .ailf/results/captures/)")
         .option("--no-capture-compress", "Disable tar.gz compression of captures")
         .option("--no-capture-extras", "Exclude mode-specific artifacts from captures")
         .action(async (opts) => {

package/dist/commands/pr-comment.js CHANGED Viewed

@@ -9,10 +9,12 @@ import { fileURLToPath } from "url";
 import { Command } from "commander";
 import { createAppContext } from "../composition-root.js";
 import { generatePrComment } from "../pipeline/pr-comment.js";
+import { addOutputDirOption } from "./shared/options.js";
+import { resolveOutputDir } from "./shared/resolve-output-dir.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..", "..");
 export function createPrCommentCommand() {
-    return new Command("pr-comment")
+    const cmd = new Command("pr-comment")
         .description("Generate a markdown PR comment from evaluation scores")
         .option("--output <path>", "Write comment to file (default: stdout)")
         .option("--promptfoo-url <url>", "Promptfoo share URL to include")
@@ -20,7 +22,7 @@ export function createPrCommentCommand() {
         try {
             const ctx = createAppContext({
                 rootDir: ROOT,
-                outputDir: resolve(ROOT, "results", "latest"),
+                outputDir: resolveOutputDir(opts.outputDir),
                 mode: "literacy",
                 noAutoScope: false,
                 skipFetch: true,
@@ -48,4 +50,6 @@ export function createPrCommentCommand() {
                 console.error(err.message);
         }
     });
+    addOutputDirOption(cmd);
+    return cmd;
 }

package/dist/commands/publish.d.ts CHANGED Viewed

@@ -21,6 +21,7 @@
 import { Command } from "commander";
 export interface PublishCommandOptions {
     dryRun: boolean;
+    outputDir?: string;
     tag?: string;
 }
 export declare function createPublishCommand(): Command;

package/dist/commands/publish.js CHANGED Viewed

@@ -23,22 +23,27 @@ import { dirname, resolve } from "path";
 import { fileURLToPath } from "url";
 import { Command } from "commander";
 import { createAppContext } from "../composition-root.js";
+import { addOutputDirOption } from "./shared/options.js";
+import { getCallerCwd, resolveOutputDir } from "./shared/resolve-output-dir.js";
 import { buildProvenance, } from "../pipeline/provenance.js";
 import { generateReportTitle } from "../pipeline/report-title.js";
 import { generateReportId, } from "../report-store.js";
 import { withRetry } from "../sinks/retry.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..", "..");
-const DEFAULT_SUMMARY_PATH = resolve(ROOT, "results", "latest", "score-summary.json");
 export function createPublishCommand() {
-    return new Command("publish")
+    const cmd = new Command("publish")
         .description("Publish a local evaluation report to the Sanity Content Lake")
-        .argument("[summary-path]", "Path to score-summary.json", DEFAULT_SUMMARY_PATH)
+        .argument("[summary-path]", "Path to score-summary.json")
         .option("-t, --tag <tag>", "Label for the published report")
         .option("-n, --dry-run", "Preview the report without writing to Sanity or sinks", false)
         .action(async (summaryPath, opts) => {
-        await runPublishCommand(summaryPath, opts);
+        const outputDir = resolveOutputDir(opts.outputDir);
+        const effectivePath = summaryPath ?? resolve(outputDir, "score-summary.json");
+        await runPublishCommand(effectivePath, outputDir, opts);
     });
+    addOutputDirOption(cmd);
+    return cmd;
 }
 // ---------------------------------------------------------------------------
 // Provenance builder (from score summary, not full pipeline context)
@@ -77,7 +82,7 @@ function buildProvenanceFromSummary(summary) {
 // ---------------------------------------------------------------------------
 // Command implementation
 // ---------------------------------------------------------------------------
-async function runPublishCommand(summaryPath, opts) {
+async function runPublishCommand(summaryPath, outputDir, opts) {
     // Wire up infrastructure via composition root
     const ctx = createAppContext({
         compareEnabled: false,
@@ -87,7 +92,7 @@ async function runPublishCommand(summaryPath, opts) {
         noAutoScope: false,
         noCache: true,
         noRemoteCache: true,
-        outputDir: resolve(ROOT, "results", "latest"),
+        outputDir,
         publishEnabled: true,
         publishTag: opts.tag,
         readinessEnabled: false,
@@ -106,8 +111,7 @@ async function runPublishCommand(summaryPath, opts) {
     // -----------------------------------------------------------------------
     // 1. Resolve and read the score summary
     // -----------------------------------------------------------------------
-    const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
-    const resolvedPath = resolve(callerCwd, summaryPath);
+    const resolvedPath = resolve(getCallerCwd(), summaryPath);
     if (!existsSync(resolvedPath)) {
         console.error(`  ✖ File not found: ${resolvedPath}`);
         console.error();

package/dist/commands/remote-pipeline.js CHANGED Viewed

@@ -88,7 +88,7 @@ export async function runRemotePipeline(opts, rootDir) {
     }
     // 7. Fetch and write output artifacts
     await writeRemoteResults(client, job, {
-        rootDir,
+        outputDir: opts.outputDir,
         outputPath: opts.outputPath,
         apiUrl: opts.apiUrl,
     });

package/dist/commands/remote-results.d.ts CHANGED Viewed

@@ -4,9 +4,9 @@
  * Produces the same file layout as local mode so downstream tools
  * (workflow PR comments, score comparison, baseline save) work unchanged:
  *
- *   results/latest/score-summary.json  — scores by area + overall
- *   results/latest/report.md           — rendered markdown report
- *   results/latest/job-metadata.json   — job ID, timing, API URL
+ *   <outputDir>/score-summary.json  — scores by area + overall
+ *   <outputDir>/report.md           — rendered markdown report
+ *   <outputDir>/job-metadata.json   — job ID, timing, API URL
  *
  * @see packages/eval/src/commands/remote-pipeline.ts — caller
  */
@@ -14,8 +14,8 @@ import type { ApiClient } from "../adapters/api-client/api-client.js";
 import type { JobResponse } from "../adapters/api-client/types.js";
 /** Options for writing remote results. */
 export interface WriteResultsOptions {
-    /** Eval package root directory (for results/latest/ path). */
-    rootDir: string;
+    /** Base directory for output artifacts. */
+    outputDir: string;
     /** Optional output path override (--output flag). */
     outputPath?: string;
     /** API base URL (for metadata). */
@@ -25,9 +25,9 @@ export interface WriteResultsOptions {
  * Fetch report artifacts from the API and write them to disk.
  *
  * Writes:
- * - `results/latest/score-summary.json` — score data from job response
- * - `results/latest/report.md` — full markdown report (if reportId present)
- * - `results/latest/job-metadata.json` — job tracking info
+ * - `<outputDir>/score-summary.json` — score data from job response
+ * - `<outputDir>/report.md` — full markdown report (if reportId present)
+ * - `<outputDir>/job-metadata.json` — job tracking info
  * - `--output` path — markdown report (if specified)
  */
 export declare function writeRemoteResults(client: ApiClient, job: JobResponse, options: WriteResultsOptions): Promise<void>;

package/dist/commands/remote-results.js CHANGED Viewed

@@ -4,9 +4,9 @@
  * Produces the same file layout as local mode so downstream tools
  * (workflow PR comments, score comparison, baseline save) work unchanged:
  *
- *   results/latest/score-summary.json  — scores by area + overall
- *   results/latest/report.md           — rendered markdown report
- *   results/latest/job-metadata.json   — job ID, timing, API URL
+ *   <outputDir>/score-summary.json  — scores by area + overall
+ *   <outputDir>/report.md           — rendered markdown report
+ *   <outputDir>/job-metadata.json   — job ID, timing, API URL
  *
  * @see packages/eval/src/commands/remote-pipeline.ts — caller
  */
@@ -19,13 +19,13 @@ import { resolve } from "path";
  * Fetch report artifacts from the API and write them to disk.
  *
  * Writes:
- * - `results/latest/score-summary.json` — score data from job response
- * - `results/latest/report.md` — full markdown report (if reportId present)
- * - `results/latest/job-metadata.json` — job tracking info
+ * - `<outputDir>/score-summary.json` — score data from job response
+ * - `<outputDir>/report.md` — full markdown report (if reportId present)
+ * - `<outputDir>/job-metadata.json` — job tracking info
  * - `--output` path — markdown report (if specified)
  */
 export async function writeRemoteResults(client, job, options) {
-    const resultsDir = resolve(options.rootDir, "results", "latest");
+    const resultsDir = options.outputDir;
     mkdirSync(resultsDir, { recursive: true });
     // 1. Write score summary
     const scoreSummary = buildScoreSummary(job);

package/dist/commands/shared/options.d.ts CHANGED Viewed

@@ -18,6 +18,14 @@ export declare function addDebugOptions(cmd: Command): Command;
  * Add output options: --output, --format
  */
 export declare function addOutputOptions(cmd: Command): Command;
+/**
+ * Add --output-dir option for commands that write pipeline artifacts.
+ *
+ * Pair with `resolveOutputDir()` from `./resolve-output-dir.js` to resolve
+ * the value. When omitted, `resolveOutputDir()` defaults to
+ * `$CWD/.ailf/results/latest/`.
+ */
+export declare function addOutputDirOption(cmd: Command): Command;
 /**
  * Add Sanity source options: --sanity-dataset, --sanity-project, etc.
  */