npm - @sanity/ailf - Versions diffs - 2.2.0 → 2.3.1 - Mend

@sanity/ailf 2.2.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/config/rubrics.ts +3 -3
package/dist/_vendor/ailf-core/types/index.d.ts +25 -0
package/dist/adapters/task-sources/content-lake-task-source.js +15 -7
package/dist/commands/calculate-scores.js +7 -2
package/dist/commands/capture-list.d.ts +1 -1
package/dist/commands/capture-list.js +6 -3
package/dist/commands/compare.js +11 -7
package/dist/commands/explain-handler.js +22 -24
package/dist/commands/fetch-docs.js +4 -2
package/dist/commands/generate-configs.js +6 -2
package/dist/commands/pipeline-action.js +8 -24
package/dist/commands/pipeline.js +1 -1
package/dist/commands/pr-comment.js +6 -2
package/dist/commands/publish.d.ts +1 -0
package/dist/commands/publish.js +12 -8
package/dist/commands/remote-pipeline.js +1 -1
package/dist/commands/remote-results.d.ts +8 -8
package/dist/commands/remote-results.js +7 -7
package/dist/commands/shared/options.d.ts +8 -0
package/dist/commands/shared/options.js +10 -0
package/dist/commands/shared/resolve-output-dir.d.ts +27 -0
package/dist/commands/shared/resolve-output-dir.js +36 -0
package/dist/composition-root.js +1 -1
package/dist/config/rubrics.ts +3 -3
package/dist/orchestration/build-app-context.js +1 -1
package/dist/orchestration/steps/fetch-docs-step.js +23 -9
package/dist/orchestration/steps/gap-analysis-step.js +86 -75
package/dist/orchestration/steps/generate-configs-step.d.ts +15 -0
package/dist/orchestration/steps/generate-configs-step.js +56 -0
package/dist/orchestration/steps/run-eval-step.js +14 -0
package/dist/pipeline/calculate-scores.js +113 -2
package/dist/pipeline/compare.js +50 -19
package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +64 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +6 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +14 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +3 -0
package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +1 -27
package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +2 -9
package/dist/pipeline/compiler/rubric-resolution.d.ts +40 -0
package/dist/pipeline/compiler/rubric-resolution.js +52 -0
package/dist/pipeline/compiler/scoring-bridge.js +59 -7
package/dist/pipeline/provenance.js +7 -1
package/dist/pipeline/validate.d.ts +5 -4
package/dist/pipeline/validate.js +34 -113
package/dist/webhook/eval-request-handler.js +4 -0
package/package.json +1 -1

package/config/rubrics.ts CHANGED Viewed

@@ -201,9 +201,9 @@ export default defineRubrics({
       currency: 0.2,
     },
     "agent-harness": {
-      "agent-output": 0.45,
-      "tool-usage": 0.4,
-      "process-quality": 0.15,
+      "assertion-pass-rate": 0.35,
+      "agent-output": 0.35,
+      "tool-usage": 0.3,
     },
   },

package/dist/_vendor/ailf-core/types/index.d.ts CHANGED Viewed

@@ -126,31 +126,56 @@ export interface FeatureScore {
      * Only present when agentic evaluation data is available.
      */
     actualScore?: number;
+    /**
+     * Assertion pass rate — fraction of structural assertions that passed (0–100).
+     * Only present for modes with javascript assertions (agent-harness, agent-task).
+     */
+    assertionPassRate?: number;
     /**
      * Ceiling score — gold-standard docs injected directly.
      * This is the theoretical maximum score for this area given the current docs.
+     * Set to 0 for modes without with/without-docs variants (agent-harness).
      */
     ceilingScore: number;
     codeCorrectness: number;
+    /**
+     * Generic dimension scores map — all dimensions by kebab-case name (0–100).
+     * Includes the three named fields above plus any mode-specific dimensions
+     * (e.g., agent-output, tool-usage, assertion-pass-rate).
+     * New consumers should read from this map. The named fields are backward-
+     * compatible accessors populated from it.
+     */
+    dimensions?: Record<string, number>;
     docCoverage: number;
     /** Sanity documents used for this feature area's evaluation */
     documents?: DocumentRef[];
     /**
      * Doc Lift — documentation quality contribution (ceiling − floor).
      * Positive when docs help, negative when docs hurt (interference).
+     * Set to 0 for modes without with/without-docs variants (agent-harness).
      */
     docLift: number;
     /**
      * Doc quality gap — room for documentation improvement (100 − ceiling).
      * Lower is better.
+     * Set to 0 for modes without with/without-docs variants (agent-harness).
      */
     docQualityGap: number;
     feature: string;
     /**
      * Floor score — no docs, training data only.
      * The model's inherent knowledge baseline.
+     * Set to 0 for modes without with/without-docs variants (agent-harness).
      */
     floorScore: number;
+    /**
+     * How this score entry was grouped.
+     *   - "feature": by documentation feature area (literacy mode)
+     *   - "task": by individual task ID (agent-harness mode)
+     *   - "aggregate": single aggregate across all tasks
+     * Defaults to "feature" when absent (backward compatibility).
+     */
+    groupType?: "aggregate" | "feature" | "task";
     /**
      * Infrastructure efficiency — actual / ceiling (0.0–1.0).
      * What fraction of documentation quality reaches agents through discovery?

package/dist/adapters/task-sources/content-lake-task-source.js CHANGED Viewed

@@ -28,7 +28,13 @@
  */
 const TASKS_QUERY = /* groq */ `
 *[_type == "ailf.task"
-  && (!defined($areas) || area->areaId.current in $areas)
+  && (
+    !defined($areas)
+    // Current field name
+    || area->areaId.current in $areas
+    // Legacy field name (pre-schema-rename documents)
+    || featureArea->areaId.current in $areas
+  )
   && (!defined($taskIds) || id.current in $taskIds)
   && (
     // Status-based filtering (unified — replaces execution.enabled)
@@ -39,13 +45,15 @@ const TASKS_QUERY = /* groq */ `
     || (defined($taskIds) && status != "archived")
   )
   && (!defined($tags) || count((tags)[@ in $tags]) > 0)
-] | order(area->areaId.current asc, id.current asc) {
+] | order(coalesce(area->areaId.current, featureArea->areaId.current) asc, id.current asc) {
   "taskId": id.current,
-  title,
-  "areaId": area->areaId.current,
-  promptText,
+  // Coalesce current and legacy field names so documents created before
+  // the schema rename are still readable.
+  "title": coalesce(title, description),
+  "areaId": coalesce(area->areaId.current, featureArea->areaId.current),
+  "promptText": coalesce(promptText, taskPrompt),
   docCoverage,
-  "contextDocs": contextDocs[] {
+  "contextDocs": coalesce(contextDocs, canonicalDocs)[] {
     refType,
     "slug": doc->slug.current,
     "docRefId": doc->_id,
@@ -55,7 +63,7 @@ const TASKS_QUERY = /* groq */ `
     perspective,
     reason
   },
-  assertions,
+  "assertions": coalesce(assertions, assert),
   rawAssert,
   baseline,
   tags,

package/dist/commands/calculate-scores.js CHANGED Viewed

@@ -9,18 +9,21 @@ import { fileURLToPath } from "url";
 import { Command } from "commander";
 import { createAppContext } from "../composition-root.js";
 import { calculateAndWriteScores } from "../pipeline/calculate-scores.js";
+import { addOutputDirOption } from "./shared/options.js";
+import { resolveOutputDir } from "./shared/resolve-output-dir.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..", "..");
 export function createCalculateScoresCommand() {
-    return new Command("calculate-scores")
+    const cmd = new Command("calculate-scores")
         .description("Calculate AI Literacy Scores from Promptfoo evaluation results")
         .option("--source <name>", "Documentation source name")
         .argument("[results-path]", "Path to eval-results.json")
         .action(async (resultsPath, opts) => {
         try {
+            const outputDir = resolveOutputDir(opts.outputDir);
             const ctx = createAppContext({
                 rootDir: ROOT,
-                outputDir: resolve(ROOT, "results", "latest"),
+                outputDir,
                 mode: "literacy",
                 noAutoScope: false,
                 skipFetch: true,
@@ -53,4 +56,6 @@ export function createCalculateScoresCommand() {
                 console.error(err.message);
         }
     });
+    addOutputDirOption(cmd);
+    return cmd;
 }

package/dist/commands/capture-list.d.ts CHANGED Viewed

@@ -5,7 +5,7 @@
  * reads each manifest, and prints a summary table sorted by date.
  *
  * Usage:
- *   ailf capture list                          # default: results/captures/
+ *   ailf capture list                          # default: .ailf/results/captures/
  *   ailf capture list ./my-captures            # custom directory
  */
 import { Command } from "commander";

package/dist/commands/capture-list.js CHANGED Viewed

@@ -5,22 +5,25 @@
  * reads each manifest, and prints a summary table sorted by date.
  *
  * Usage:
- *   ailf capture list                          # default: results/captures/
+ *   ailf capture list                          # default: .ailf/results/captures/
  *   ailf capture list ./my-captures            # custom directory
  */
 import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
 import { join, resolve } from "node:path";
 import { Command } from "commander";
+import { resolveOutputDir } from "./shared/resolve-output-dir.js";
 // ---------------------------------------------------------------------------
 // Command factory
 // ---------------------------------------------------------------------------
 export function createCaptureListCommand() {
     return new Command("list")
         .description("List pipeline captures in a directory")
-        .argument("[dir]", "Captures directory (default: results/captures/)")
+        .argument("[dir]", "Captures directory (default: .ailf/results/captures/)")
         .option("-f, --format <fmt>", "Output format: table or json", "table")
         .action(async (dir, opts) => {
-        const captureDir = resolve(dir ?? "results/captures");
+        const captureDir = dir
+            ? resolve(dir)
+            : resolve(resolveOutputDir(), "..", "captures");
         if (!existsSync(captureDir)) {
             console.error(`  No captures directory found at ${captureDir}`);
             console.error("  Run 'ailf pipeline --capture' to create captures.");

package/dist/commands/compare.js CHANGED Viewed

@@ -9,29 +9,31 @@ import { dirname, join, resolve } from "path";
 import { fileURLToPath } from "url";
 import { Command } from "commander";
 import { compare } from "../pipeline/compare.js";
+import { addOutputDirOption } from "./shared/options.js";
+import { resolveOutputDir } from "./shared/resolve-output-dir.js";
 import { DEFAULT_NOISE_THRESHOLD, } from "../pipeline/types.js";
 import { formatComparisonTable } from "../_vendor/ailf-core/index.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..", "..");
 const BASELINES_DIR = join(ROOT, "results", "baselines");
-const SCORE_SUMMARY_PATH = join(ROOT, "results", "latest", "score-summary.json");
 // ---------------------------------------------------------------------------
 // Helpers
 // ---------------------------------------------------------------------------
 export function createCompareCommand() {
-    return new Command("compare")
+    const cmd = new Command("compare")
         .description("Compare two evaluation score summaries")
         .option("-b, --baseline <path>", "Baseline score-summary.json (default: latest baseline)")
-        .option("-e, --experiment <path>", "Experiment score-summary.json (default: results/latest/score-summary.json)")
+        .option("-e, --experiment <path>", "Experiment score-summary.json (default: .ailf/results/latest/score-summary.json)")
         .option("-t, --threshold <n>", "Noise threshold for unchanged classification", parseFloat)
         .option("-o, --output <path>", "Write JSON report to file")
         .option("-f, --format <fmt>", "Output format: table or json", "table")
         .action(async (opts) => {
+        const outputDir = resolveOutputDir(opts.outputDir);
         const threshold = opts.threshold ?? DEFAULT_NOISE_THRESHOLD;
         // Resolve experiment path
         const expPath = opts.experiment
             ? resolve(opts.experiment)
-            : SCORE_SUMMARY_PATH;
+            : join(outputDir, "score-summary.json");
         const experiment = loadSummary(expPath);
         // Resolve baseline path
         let basePath;
@@ -48,7 +50,7 @@ export function createCompareCommand() {
         }
         const baseline = loadSummary(basePath);
         // Try to load grader consistency data for empirical thresholds
-        const consistencyPath = join(ROOT, "results", "latest", "grader-consistency.json");
+        const consistencyPath = join(outputDir, "grader-consistency.json");
         let graderConsistency;
         if (existsSync(consistencyPath) && opts.threshold === undefined) {
             try {
@@ -93,10 +95,12 @@ export function createCompareCommand() {
                 console.log(`  ✅ Comparison report also written to ${opts.output}`);
             }
         }
-        // Write comparison report to results/latest for other steps to consume
-        const latestComparisonPath = join(ROOT, "results", "latest", "comparison-report.json");
+        // Write comparison report to output dir for other steps to consume
+        const latestComparisonPath = join(outputDir, "comparison-report.json");
         writeFileSync(latestComparisonPath, JSON.stringify(report, null, 2));
     });
+    addOutputDirOption(cmd);
+    return cmd;
 }
 function findLatestBaseline() {
     if (!existsSync(BASELINES_DIR))

package/dist/commands/explain-handler.js CHANGED Viewed

@@ -23,6 +23,7 @@ import { TASK_FILE_NAMES } from "../_vendor/ailf-core/index.js";
 import { buildPipelinePlan, buildSimpleCommandPlan, } from "../pipeline/plan.js";
 import { formatPlanConsole, formatPlanJson } from "../pipeline/plan-format.js";
 import { computeResolvedOptions } from "./pipeline-action.js";
+import { getCallerCwd } from "./shared/resolve-output-dir.js";
 import { LiteracyVariant } from "../pipeline/normalize-mode.js";
 // ---------------------------------------------------------------------------
 // Registry
@@ -43,10 +44,10 @@ const EXPLAIN_REGISTRY = {
     "agent-report": {
         description: "Generate an agent behavior observation report from eval results",
         filesCreated: [
-            "results/latest/agent-report.json",
-            "results/latest/agent-report.md",
+            "<outputDir>/agent-report.json",
+            "<outputDir>/agent-report.md",
         ],
-        filesRead: ["results/latest/eval-results.json"],
+        filesRead: ["<outputDir>/eval-results.json"],
         steps: [
             {
                 cacheStatus: "miss",
@@ -82,9 +83,9 @@ const EXPLAIN_REGISTRY = {
     },
     "calculate-scores": {
         description: "Calculate AI Literacy Scores from Promptfoo evaluation results",
-        filesCreated: ["results/latest/score-summary.json"],
+        filesCreated: ["<outputDir>/score-summary.json"],
         filesRead: [
-            "results/latest/eval-results.json",
+            "<outputDir>/eval-results.json",
             "config/rubrics.ts",
             "config/models.ts",
         ],
@@ -104,23 +105,20 @@ const EXPLAIN_REGISTRY = {
             {
                 cacheStatus: "miss",
                 name: "Write summary",
-                reason: "Persist score-summary.json to results/latest/",
+                reason: "Persist score-summary.json to output directory",
                 willRun: true,
             },
         ],
     },
     compare: {
         description: "Compare current evaluation scores against a saved baseline",
-        filesCreated: ["results/latest/comparison-report.json"],
-        filesRead: [
-            "results/latest/score-summary.json",
-            "results/baselines/*.json",
-        ],
+        filesCreated: ["<outputDir>/comparison-report.json"],
+        filesRead: ["<outputDir>/score-summary.json", "results/baselines/*.json"],
         steps: [
             {
                 cacheStatus: "miss",
                 name: "Load current scores",
-                reason: "Read results/latest/score-summary.json",
+                reason: "Read <outputDir>/score-summary.json",
                 willRun: true,
             },
             {
@@ -181,8 +179,8 @@ const EXPLAIN_REGISTRY = {
     },
     "discovery-report": {
         description: "Generate agent discoverability report from agentic retrieval metrics",
-        filesCreated: ["results/latest/discovery-report.md"],
-        filesRead: ["results/latest/score-summary.json"],
+        filesCreated: ["<outputDir>/discovery-report.md"],
+        filesRead: ["<outputDir>/score-summary.json"],
         steps: [
             {
                 cacheStatus: "miss",
@@ -206,7 +204,7 @@ const EXPLAIN_REGISTRY = {
     },
     eval: {
         description: "Run Promptfoo evaluation directly (passthrough — all flags forwarded to promptfoo)",
-        filesCreated: ["results/latest/eval-results.json"],
+        filesCreated: ["<outputDir>/eval-results.json"],
         filesRead: ["promptfooconfig.yaml"],
         steps: [
             {
@@ -280,7 +278,7 @@ const EXPLAIN_REGISTRY = {
     grader: {
         description: "Grader reliability tools (consistency, compare, sensitivity, validate)",
         filesRead: [
-            "results/latest/eval-results.json",
+            "<outputDir>/eval-results.json",
             "config/rubrics.ts",
             "canonical/reference-solutions/",
         ],
@@ -312,7 +310,7 @@ const EXPLAIN_REGISTRY = {
     },
     "measure-retrieval": {
         description: "Measure Sanity text search retrieval quality against canonical document annotations",
-        filesCreated: ["results/latest/retrieval-metrics.json"],
+        filesCreated: ["<outputDir>/retrieval-metrics.json"],
         filesRead: ["tasks/literacy/*.task.ts"],
         steps: [
             {
@@ -337,7 +335,7 @@ const EXPLAIN_REGISTRY = {
     },
     "pr-comment": {
         description: "Generate a markdown PR comment from evaluation scores for CI posting",
-        filesRead: ["results/latest/score-summary.json"],
+        filesRead: ["<outputDir>/score-summary.json"],
         steps: [
             {
                 cacheStatus: "miss",
@@ -355,7 +353,7 @@ const EXPLAIN_REGISTRY = {
     },
     publish: {
         description: "Publish a local evaluation report to the Sanity Content Lake (standalone)",
-        filesRead: ["results/latest/score-summary.json"],
+        filesRead: ["<outputDir>/score-summary.json"],
         steps: [
             {
                 cacheStatus: "miss",
@@ -386,12 +384,12 @@ const EXPLAIN_REGISTRY = {
     "readiness-report": {
         description: "Generate launch readiness checklist for a feature area with threshold evaluation",
         filesRead: [
-            "results/latest/score-summary.json",
-            "results/latest/gap-analysis.json",
+            "<outputDir>/score-summary.json",
+            "<outputDir>/gap-analysis.json",
             "config/thresholds.ts",
             "results/baselines/",
         ],
-        filesCreated: ["results/latest/readiness-report.md"],
+        filesCreated: ["<outputDir>/readiness-report.md"],
         steps: [
             {
                 cacheStatus: "miss",
@@ -603,7 +601,7 @@ function buildInitExplainPlan(actionCommand, rootDir) {
     const configFile = format === "ts"
         ? "ailf.config.ts"
         : `config.${format === "yaml" ? "yaml" : "json"}`;
-    const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
+    const callerCwd = getCallerCwd();
     const targetDir = opts.path ?? ".";
     const ailfDir = `${targetDir}/.ailf`;
     const tasksDir = `${ailfDir}/tasks`;
@@ -664,7 +662,7 @@ function buildBaselineExplainPlan(actionCommand, rootDir) {
         command: `baseline ${subcommand}`,
         description: descriptions[subcommand] ?? `Baseline operation: ${subcommand}`,
         filesCreated: subcommand === "save" ? ["results/baselines/<timestamp>.json"] : [],
-        filesRead: ["results/latest/score-summary.json", "results/baselines/"],
+        filesRead: ["<outputDir>/score-summary.json", "results/baselines/"],
         rootDir,
     });
 }

package/dist/commands/fetch-docs.js CHANGED Viewed

@@ -11,7 +11,8 @@ import { Command } from "commander";
 import { createAppContext } from "../composition-root.js";
 import { loadSource } from "../sources.js";
 import { configToSourceOverrides } from "../orchestration/config-to-source-overrides.js";
-import { addSanitySourceOptions } from "./shared/options.js";
+import { addOutputDirOption, addSanitySourceOptions } from "./shared/options.js";
+import { resolveOutputDir } from "./shared/resolve-output-dir.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..", "..");
 export function createFetchDocsCommand() {
@@ -31,6 +32,7 @@ export function createFetchDocsCommand() {
         }
     });
     addSanitySourceOptions(cmd);
+    addOutputDirOption(cmd);
     return cmd;
 }
 // ---------------------------------------------------------------------------
@@ -41,7 +43,7 @@ async function executeFetchDocs(opts) {
     // Build a minimal ResolvedConfig for the composition root
     const ctx = createAppContext({
         rootDir: ROOT,
-        outputDir: resolve(ROOT, "results", "latest"),
+        outputDir: resolveOutputDir(opts.outputDir),
         mode: "literacy",
         noAutoScope: false,
         skipFetch: false,

package/dist/commands/generate-configs.js CHANGED Viewed

@@ -9,17 +9,19 @@ import { fileURLToPath } from "url";
 import { Command } from "commander";
 import { createAppContext } from "../composition-root.js";
 import { GenerateConfigsStep } from "../orchestration/steps/generate-configs-step.js";
+import { addOutputDirOption } from "./shared/options.js";
+import { resolveOutputDir } from "./shared/resolve-output-dir.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..", "..");
 export function createGenerateConfigsCommand() {
-    return new Command("generate-configs")
+    const cmd = new Command("generate-configs")
         .description("Generate promptfoo config files from config/models.yaml")
         .option("-s, --source <name>", "Documentation source name")
         .action(async (opts) => {
         try {
             const ctx = createAppContext({
                 rootDir: ROOT,
-                outputDir: resolve(ROOT, "results", "latest"),
+                outputDir: resolveOutputDir(opts.outputDir),
                 mode: "literacy",
                 noAutoScope: false,
                 skipFetch: true,
@@ -58,4 +60,6 @@ export function createGenerateConfigsCommand() {
                 console.error(err.message);
         }
     });
+    addOutputDirOption(cmd);
+    return cmd;
 }

package/dist/commands/pipeline-action.js CHANGED Viewed

@@ -21,6 +21,7 @@ import { buildStepSequence } from "../orchestration/build-step-sequence.js";
 import { orchestratePipeline } from "../orchestration/pipeline-orchestrator.js";
 import { load } from "js-yaml";
 import { parseRepoConfig, } from "../adapters/task-sources/repo-schemas.js";
+import { getCallerCwd, resolveOutputDir } from "./shared/resolve-output-dir.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..", "..");
 // ---------------------------------------------------------------------------
@@ -35,7 +36,7 @@ const VALID_SEARCH_MODES = ["open", "origin-only", "off"];
  */
 export function computeResolvedOptions(opts) {
     // Resolve paths relative to the caller's cwd, not the eval package root
-    const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
+    const callerCwd = getCallerCwd();
     // Validate + normalize mode via the single boundary function.
     // normalizeMode() maps legacy variant names (baseline, agentic, etc.)
     // to canonical mode "literacy" + variant, and throws on invalid input.
@@ -209,23 +210,12 @@ export function computeResolvedOptions(opts) {
     const remote = opts.remote || process.env.AILF_REMOTE === "1";
     const apiUrl = opts.apiUrl ?? process.env.AILF_API_URL ?? "https://ailf-api.sanity.build";
     const apiKey = process.env.AILF_API_KEY ?? undefined;
-    // Output directory: explicit flag → repo-task heuristic → default
+    // Output directory: explicit --output-dir → $CWD/.ailf/results/latest/
     const resolvedRepoTasksPath = opts.repoTasksPath
         ? resolve(callerCwd, opts.repoTasksPath)
         : undefined;
     const resolvedTaskSourceType = resolveTaskSourceType(opts.taskSource);
-    let outputDir;
-    if (opts.outputDir) {
-        outputDir = resolve(callerCwd, opts.outputDir);
-    }
-    else if (resolvedTaskSourceType === "repo" || resolvedRepoTasksPath) {
-        outputDir = resolvedRepoTasksPath
-            ? resolve(resolvedRepoTasksPath, "..", "results", "latest")
-            : resolve(callerCwd, ".ailf", "results", "latest");
-    }
-    else {
-        outputDir = resolve(ROOT, "results", "latest");
-    }
+    const outputDir = resolveOutputDir(opts.outputDir);
     return {
         allowedOriginArgs,
         apiKey,
@@ -310,7 +300,7 @@ export async function executePipeline(cliOpts) {
         }
         const { FileConfigAdapter } = await import("../adapters/config-sources/file-config-adapter.js");
         const { createAppContext } = await import("../composition-root.js");
-        const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
+        const callerCwd = getCallerCwd();
         const adapter = new FileConfigAdapter(cliOpts.config, ROOT);
         const config = await adapter.resolve();
         // Merge CLI-only flags that aren't in the config file.
@@ -323,13 +313,8 @@ export async function executePipeline(cliOpts) {
         if (cliOpts.output) {
             config.outputPath = resolve(callerCwd, cliOpts.output);
         }
-        // Output dir: explicit CLI flag → repo-task heuristic → file-config default
-        if (cliOpts.outputDir) {
-            config.outputDir = resolve(callerCwd, cliOpts.outputDir);
-        }
-        else if (config.repoTasksPath) {
-            config.outputDir = resolve(config.repoTasksPath, "..", "results", "latest");
-        }
+        // Output dir: explicit CLI flag → $CWD/.ailf/results/latest/
+        config.outputDir = resolveOutputDir(cliOpts.outputDir);
         // Create AppContext directly from the merged config so adapters
         // (especially taskSource) are wired from the file config's
         // taskSourceType — not from CLI defaults.
@@ -350,8 +335,7 @@ export async function executePipeline(cliOpts) {
     // cache which never contains .ailf/.
     if (o.remote) {
         const { runRemotePipeline } = await import("./remote-pipeline.js");
-        const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
-        await runRemotePipeline(o, callerCwd);
+        await runRemotePipeline(o, getCallerCwd());
         return;
     }
     // Dry-run: validate only, don't execute steps

package/dist/commands/pipeline.js CHANGED Viewed

@@ -55,7 +55,7 @@ export function createPipelineCommand() {
         .option("--remote", "Submit evaluation to the AILF API instead of running locally", false)
         .option("--api-url <url>", "AILF API base URL (default: https://ailf-api.sanity.build)")
         .option("--capture", "Enable artifact capture for this run", false)
-        .option("--capture-dir <path>", "Base directory for capture output (default: results/captures/)")
+        .option("--capture-dir <path>", "Base directory for capture output (default: .ailf/results/captures/)")
         .option("--no-capture-compress", "Disable tar.gz compression of captures")
         .option("--no-capture-extras", "Exclude mode-specific artifacts from captures")
         .action(async (opts) => {

package/dist/commands/pr-comment.js CHANGED Viewed

@@ -9,10 +9,12 @@ import { fileURLToPath } from "url";
 import { Command } from "commander";
 import { createAppContext } from "../composition-root.js";
 import { generatePrComment } from "../pipeline/pr-comment.js";
+import { addOutputDirOption } from "./shared/options.js";
+import { resolveOutputDir } from "./shared/resolve-output-dir.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..", "..");
 export function createPrCommentCommand() {
-    return new Command("pr-comment")
+    const cmd = new Command("pr-comment")
         .description("Generate a markdown PR comment from evaluation scores")
         .option("--output <path>", "Write comment to file (default: stdout)")
         .option("--promptfoo-url <url>", "Promptfoo share URL to include")
@@ -20,7 +22,7 @@ export function createPrCommentCommand() {
         try {
             const ctx = createAppContext({
                 rootDir: ROOT,
-                outputDir: resolve(ROOT, "results", "latest"),
+                outputDir: resolveOutputDir(opts.outputDir),
                 mode: "literacy",
                 noAutoScope: false,
                 skipFetch: true,
@@ -48,4 +50,6 @@ export function createPrCommentCommand() {
                 console.error(err.message);
         }
     });
+    addOutputDirOption(cmd);
+    return cmd;
 }

package/dist/commands/publish.d.ts CHANGED Viewed

@@ -21,6 +21,7 @@
 import { Command } from "commander";
 export interface PublishCommandOptions {
     dryRun: boolean;
+    outputDir?: string;
     tag?: string;
 }
 export declare function createPublishCommand(): Command;

package/dist/commands/publish.js CHANGED Viewed

@@ -23,22 +23,27 @@ import { dirname, resolve } from "path";
 import { fileURLToPath } from "url";
 import { Command } from "commander";
 import { createAppContext } from "../composition-root.js";
+import { addOutputDirOption } from "./shared/options.js";
+import { getCallerCwd, resolveOutputDir } from "./shared/resolve-output-dir.js";
 import { buildProvenance, } from "../pipeline/provenance.js";
 import { generateReportTitle } from "../pipeline/report-title.js";
 import { generateReportId, } from "../report-store.js";
 import { withRetry } from "../sinks/retry.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..", "..");
-const DEFAULT_SUMMARY_PATH = resolve(ROOT, "results", "latest", "score-summary.json");
 export function createPublishCommand() {
-    return new Command("publish")
+    const cmd = new Command("publish")
         .description("Publish a local evaluation report to the Sanity Content Lake")
-        .argument("[summary-path]", "Path to score-summary.json", DEFAULT_SUMMARY_PATH)
+        .argument("[summary-path]", "Path to score-summary.json")
         .option("-t, --tag <tag>", "Label for the published report")
         .option("-n, --dry-run", "Preview the report without writing to Sanity or sinks", false)
         .action(async (summaryPath, opts) => {
-        await runPublishCommand(summaryPath, opts);
+        const outputDir = resolveOutputDir(opts.outputDir);
+        const effectivePath = summaryPath ?? resolve(outputDir, "score-summary.json");
+        await runPublishCommand(effectivePath, outputDir, opts);
     });
+    addOutputDirOption(cmd);
+    return cmd;
 }
 // ---------------------------------------------------------------------------
 // Provenance builder (from score summary, not full pipeline context)
@@ -77,7 +82,7 @@ function buildProvenanceFromSummary(summary) {
 // ---------------------------------------------------------------------------
 // Command implementation
 // ---------------------------------------------------------------------------
-async function runPublishCommand(summaryPath, opts) {
+async function runPublishCommand(summaryPath, outputDir, opts) {
     // Wire up infrastructure via composition root
     const ctx = createAppContext({
         compareEnabled: false,
@@ -87,7 +92,7 @@ async function runPublishCommand(summaryPath, opts) {
         noAutoScope: false,
         noCache: true,
         noRemoteCache: true,
-        outputDir: resolve(ROOT, "results", "latest"),
+        outputDir,
         publishEnabled: true,
         publishTag: opts.tag,
         readinessEnabled: false,
@@ -106,8 +111,7 @@ async function runPublishCommand(summaryPath, opts) {
     // -----------------------------------------------------------------------
     // 1. Resolve and read the score summary
     // -----------------------------------------------------------------------
-    const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
-    const resolvedPath = resolve(callerCwd, summaryPath);
+    const resolvedPath = resolve(getCallerCwd(), summaryPath);
     if (!existsSync(resolvedPath)) {
         console.error(`  ✖ File not found: ${resolvedPath}`);
         console.error();

package/dist/commands/remote-pipeline.js CHANGED Viewed

@@ -88,7 +88,7 @@ export async function runRemotePipeline(opts, rootDir) {
     }
     // 7. Fetch and write output artifacts
     await writeRemoteResults(client, job, {
-        rootDir,
+        outputDir: opts.outputDir,
         outputPath: opts.outputPath,
         apiUrl: opts.apiUrl,
     });