npm - @sanity/ailf - Versions diffs - 3.7.0 → 3.8.1 - Mend

@sanity/ailf 3.7.0 → 3.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

package/config/airbyte/ai_literacy_framework.connector.yaml +1 -1
package/config/thresholds.ts +3 -3
package/dist/_vendor/ailf-core/examples/index.d.ts +2 -2
package/dist/_vendor/ailf-core/examples/index.js +2 -2
package/dist/_vendor/ailf-core/ports/context.d.ts +0 -4
package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +38 -12
package/dist/_vendor/ailf-core/schemas/eval-config.js +102 -22
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +4 -6
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -3
package/dist/_vendor/ailf-core/schemas/schedules.d.ts +2 -2
package/dist/_vendor/ailf-shared/run-classification.d.ts +2 -2
package/dist/_vendor/ailf-shared/run-classification.js +1 -1
package/dist/_vendor/ailf-shared/run-context.d.ts +1 -1
package/dist/adapters/api-client/build-request.d.ts +0 -2
package/dist/adapters/api-client/build-request.js +2 -6
package/dist/adapters/config-sources/cli-config-adapter.d.ts +1 -1
package/dist/adapters/config-sources/file-config-adapter.d.ts +1 -1
package/dist/adapters/config-sources/file-config-adapter.js +42 -17
package/dist/adapters/task-sources/repo-schemas.d.ts +41 -3
package/dist/adapters/task-sources/repo-schemas.js +127 -0
package/dist/cli-program.d.ts +39 -0
package/dist/cli-program.js +137 -0
package/dist/cli.d.ts +8 -2
package/dist/cli.js +128 -142
package/dist/commands/agent-report.js +1 -1
package/dist/commands/calculate-scores.js +0 -2
package/dist/commands/check-staleness.js +1 -1
package/dist/commands/chronic-failures.js +4 -4
package/dist/commands/coverage-audit.js +6 -7
package/dist/commands/discovery-report.js +16 -4
package/dist/commands/eval.d.ts +1 -1
package/dist/commands/eval.js +1 -1
package/dist/commands/explain-handler.d.ts +1 -1
package/dist/commands/explain-handler.js +13 -44
package/dist/commands/fetch-docs.js +0 -2
package/dist/commands/generate-configs.js +0 -2
package/dist/commands/grader/index.js +3 -3
package/dist/commands/init.d.ts +2 -2
package/dist/commands/init.js +10 -9
package/dist/commands/interactive.d.ts +1 -1
package/dist/commands/interactive.js +8 -8
package/dist/commands/pipeline-action.d.ts +1 -3
package/dist/commands/pipeline-action.js +174 -140
package/dist/commands/pr-comment.js +1 -3
package/dist/commands/publish.d.ts +1 -1
package/dist/commands/publish.js +2 -4
package/dist/commands/readiness-report.js +17 -8
package/dist/commands/remote-pipeline.d.ts +1 -1
package/dist/commands/remote-pipeline.js +1 -3
package/dist/commands/run.d.ts +64 -0
package/dist/commands/{pipeline.js → run.js} +19 -30
package/dist/commands/shared/help.js +4 -4
package/dist/commands/shared/options.d.ts +29 -3
package/dist/commands/shared/options.js +37 -13
package/dist/commands/validate-tasks.js +1 -1
package/dist/commands/validate.d.ts +1 -1
package/dist/commands/validate.js +2 -2
package/dist/commands/weekly-digest.js +3 -3
package/dist/config/thresholds.ts +3 -3
package/dist/orchestration/build-app-context.js +0 -2
package/dist/orchestration/build-step-sequence.js +1 -11
package/dist/orchestration/steps/fetch-docs-step.js +1 -1
package/dist/orchestration/steps/index.d.ts +0 -2
package/dist/orchestration/steps/index.js +0 -2
package/dist/orchestration/steps/run-eval-step.js +1 -1
package/dist/pipeline/cache.d.ts +1 -1
package/dist/pipeline/map-request-to-config.js +0 -2
package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
package/dist/pipeline/plan.d.ts +2 -4
package/dist/pipeline/plan.js +4 -32
package/dist/pipeline/run-context.d.ts +1 -1
package/dist/pipeline/run-context.js +4 -4
package/dist/pipeline/validate.d.ts +1 -1
package/dist/pipeline/validate.js +1 -1
package/package.json +11 -9
package/dist/commands/pipeline.d.ts +0 -77
package/dist/orchestration/steps/discovery-report-step.d.ts +0 -13
package/dist/orchestration/steps/discovery-report-step.js +0 -62
package/dist/orchestration/steps/readiness-step.d.ts +0 -13
package/dist/orchestration/steps/readiness-step.js +0 -98
package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +0 -10
package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +0 -366
package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +0 -9
package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +0 -145
package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +0 -10
package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +0 -314
package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +0 -10
package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +0 -486
package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +0 -10
package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +0 -425
package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +0 -9
package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +0 -332
package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +0 -12
package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +0 -210
package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +0 -7
package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +0 -404
package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +0 -10
package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +0 -184
package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +0 -8
package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +0 -301
package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +0 -9
package/dist/pipeline/compiler/__tests__/telemetry.test.js +0 -503
package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +0 -10
package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +0 -509

package/dist/commands/grader/index.js CHANGED Viewed

@@ -95,16 +95,16 @@ export function createGraderCommand() {
         .command("validate")
         .description("Validate grader accuracy against human reference grades")
         .option("-g, --grader <model>", "Grader model to validate")
-        .option("-t, --threshold <n>", "MAE threshold for pass/fail", parseFloat, 10)
+        .option("--mae-threshold <n>", "MAE threshold for pass/fail", parseFloat, 10)
         .action(async (opts) => {
         try {
             const result = await runGraderValidate({
                 graderModel: opts.grader,
-                maeThreshold: opts.threshold,
+                maeThreshold: opts.maeThreshold,
                 rootDir: ROOT,
             });
             if (!result.passesThreshold) {
-                console.error(`\n  ❌ VALIDATION FAILED: MAE ${result.overallMae} exceeds threshold ${opts.threshold}`);
+                console.error(`\n  ❌ VALIDATION FAILED: MAE ${result.overallMae} exceeds threshold ${opts.maeThreshold}`);
                 process.exit(1);
             }
         }

package/dist/commands/init.d.ts CHANGED Viewed

@@ -11,8 +11,8 @@
  *
  * Usage:
  *   ailf init                        # TypeScript output (default)
- *   ailf init --output-format yaml   # YAML output
- *   ailf init --output-format json   # JSON output
+ *   ailf init --format yaml          # YAML output
+ *   ailf init --format json          # JSON output
  *   ailf init --force                # overwrite existing files
  *   ailf init --path ./my-dir        # target a specific directory
  */

package/dist/commands/init.js CHANGED Viewed

@@ -11,8 +11,8 @@
  *
  * Usage:
  *   ailf init                        # TypeScript output (default)
- *   ailf init --output-format yaml   # YAML output
- *   ailf init --output-format json   # JSON output
+ *   ailf init --format yaml          # YAML output
+ *   ailf init --format json          # JSON output
  *   ailf init --force                # overwrite existing files
  *   ailf init --path ./my-dir        # target a specific directory
  */
@@ -27,7 +27,7 @@ import { probeUserLocalAilf } from "../adapters/config-sources/ailf-resolver.js"
 export function createInitCommand() {
     return new Command("init")
         .description("Initialize a directory for AI Literacy Framework evaluation")
-        .option("--output-format <fmt>", 'Output format for generated files: "ts" (default), "yaml", or "json"', "ts")
+        .option("-f, --format <fmt>", 'Output format for generated files: "ts" (default), "yaml", or "json"', "ts")
         .option("--force", "Overwrite existing files", false)
         .option("--path <dir>", "Target directory (default: current directory)", ".")
         .option("--mode <mode>", "Scaffold for a specific mode: literacy, mcp-server, custom (default: all modes)")
@@ -63,15 +63,15 @@ function taskStemsForMode(mode) {
 // ---------------------------------------------------------------------------
 async function runInit(opts) {
     const validFormats = new Set(["ts", "yaml", "json"]);
-    if (!validFormats.has(opts.outputFormat)) {
-        console.error(`  ✗ Invalid output format "${opts.outputFormat}". Valid options: ts, yaml, json`);
+    if (!validFormats.has(opts.format)) {
+        console.error(`  ✗ Invalid output format "${opts.format}". Valid options: ts, yaml, json`);
         process.exitCode = 1;
         return;
     }
-    const format = opts.outputFormat;
+    const format = opts.format;
     const force = opts.force;
     if (format === "yaml") {
-        console.warn("  ⚠ --output-format yaml is deprecated. TypeScript (default) is the\n" +
+        console.warn("  ⚠ --format yaml is deprecated. TypeScript (default) is the\n" +
             "    recommended format — it provides full IDE autocomplete via defineTask().\n" +
             "    YAML output will be removed in a future release.\n");
     }
@@ -285,10 +285,11 @@ async function runInit(opts) {
     console.log("     AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --debug");
     console.log();
     console.log("  💡 Or test a remote run against your repo tasks:");
-    console.log("     AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --task-source=repo --debug");
+    console.log("     # First, set `taskSource: { type: repo }` in .ailf/config.yaml");
+    console.log("     AILF_API_KEY=... npx @sanity/ailf@latest run --remote --debug");
     console.log();
     console.log("  💡 Or run locally against your repo tasks:");
-    console.log("     AILF_API_KEY=... npx @sanity/ailf@latest pipeline --mode=literacy --variant=full --task-source=repo --debug --explain -y");
+    console.log("     AILF_API_KEY=... npx @sanity/ailf@latest run --mode=literacy --variant=full --debug --explain -y");
     console.log();
 }
 // ---------------------------------------------------------------------------

package/dist/commands/interactive.d.ts CHANGED Viewed

@@ -3,7 +3,7 @@
  *
  * When `ailf` is run with no arguments (or `ailf interactive`), this module
  * prompts the user through mode selection, area scoping, debug options,
- * and common flags — then builds and executes the equivalent `ailf pipeline`
+ * and common flags — then builds and executes the equivalent `ailf run`
  * command.
  *
  * Uses @inquirer/prompts for a clean, modern terminal UI.

package/dist/commands/interactive.js CHANGED Viewed

@@ -3,7 +3,7 @@
  *
  * When `ailf` is run with no arguments (or `ailf interactive`), this module
  * prompts the user through mode selection, area scoping, debug options,
- * and common flags — then builds and executes the equivalent `ailf pipeline`
+ * and common flags — then builds and executes the equivalent `ailf run`
  * command.
  *
  * Uses @inquirer/prompts for a clean, modern terminal UI.
@@ -52,9 +52,9 @@ async function runInteractiveWizard() {
     const workflow = await select({
         choices: [
             {
-                description: "Full evaluation pipeline (fetch → eval → score → report)",
-                name: "Run pipeline",
-                value: "pipeline",
+                description: "Full evaluation run (fetch → eval → score → report)",
+                name: "Run evaluation",
+                value: "run",
             },
             {
                 description: "Compare current scores against a saved baseline",
@@ -193,21 +193,21 @@ async function runInteractiveWizard() {
         });
         if (debugStyle === "first-n") {
             const n = await input({ default: "5", message: "Number of tests:" });
-            args.push("--debug-n", n);
+            args.push("--filter-first-n", n);
         }
         else if (debugStyle === "sample") {
             const n = await input({
                 default: "3",
                 message: "Sample size:",
             });
-            args.push("--debug-sample", n);
+            args.push("--filter-sample", n);
         }
         else if (debugStyle === "pattern") {
             const pattern = await input({
                 message: "Description regex (e.g. Blog, webhook):",
             });
             if (pattern.trim()) {
-                args.push("--debug-pattern", pattern.trim());
+                args.push("--filter-pattern", pattern.trim());
             }
         }
     }
@@ -238,5 +238,5 @@ async function runInteractiveWizard() {
             args.push("--explain", "--yes");
         }
     }
-    return { args, command: "pipeline" };
+    return { args, command: "run" };
 }

package/dist/commands/pipeline-action.d.ts CHANGED Viewed

@@ -12,7 +12,7 @@
  */
 import { type ImpactSummary } from "../pipeline/reverse-mapping.js";
 import type { DebugOptions, EvalMode } from "../pipeline/types.js";
-import type { PipelineCliOptions } from "./pipeline.js";
+import type { PipelineCliOptions } from "./run.js";
 export interface ResolvedOptions {
     allowedOriginArgs: string[];
     areaOption?: string;
@@ -24,7 +24,6 @@ export interface ResolvedOptions {
     concurrency?: number;
     datasetOverride?: string;
     debug?: DebugOptions;
-    discoveryReportEnabled: boolean;
     dryRun: boolean;
     gapAnalysisEnabled: boolean;
     graderReplications?: number;
@@ -46,7 +45,6 @@ export interface ResolvedOptions {
     /** True when --publish or --no-publish was explicitly passed by the user. */
     publishExplicit: boolean;
     publishTag?: string;
-    readinessEnabled: boolean;
     reportDataset?: string;
     reportProjectId?: string;
     sanityDocumentArgs: string[];

package/dist/commands/pipeline-action.js CHANGED Viewed

@@ -37,6 +37,10 @@ const VALID_SEARCH_MODES = ["open", "origin-only", "off"];
 export function computeResolvedOptions(opts) {
     // Resolve paths relative to the caller's cwd, not the eval package root
     const callerCwd = getCallerCwd();
+    // `.ailf/config.yaml` is the per-environment config home for `ailf run`
+    // (W0077 Phase 6a). Load early so downstream cascades (source, agentic,
+    // owner, output, etc.) can read from it.
+    const repoConfig = loadRepoConfigIfPresent(callerCwd);
     // Validate + normalize mode via the single boundary function.
     // normalizeMode() maps legacy variant names (baseline, agentic, etc.)
     // to canonical mode "literacy" + variant, and throws on invalid input.
@@ -59,34 +63,34 @@ export function computeResolvedOptions(opts) {
         console.error(`❌ ${err instanceof Error ? err.message : String(err)}`);
         process.exit(1);
     }
-    // Debug options — any sub-flag (--debug-n, --debug-pattern, --debug-sample)
-    // implies --debug, so users don't need to pass both.
-    // When DEBUG_EVAL is explicitly "0", ignore the sub-flags from env.
-    // CLI flags (--debug-n, --debug-pattern, --debug-sample) always win.
+    // Debug + filter options — any filter flag (--filter-first-n,
+    // --filter-pattern, --filter-sample) implies --debug, so users don't
+    // need to pass both. When DEBUG_EVAL is explicitly "0", ignore the env
+    // vars. CLI flags always win over env vars.
     const debugEnvDisabled = process.env.DEBUG_EVAL === "0";
-    const debugN = opts.debugN ??
+    const filterFirstN = opts.filterFirstN ??
         (process.env.DEBUG_EVAL_N && !debugEnvDisabled
             ? parseInt(process.env.DEBUG_EVAL_N, 10)
             : undefined);
-    const debugPattern = opts.debugPattern ??
+    const filterPattern = opts.filterPattern ??
         (process.env.DEBUG_EVAL_PATTERN && !debugEnvDisabled
             ? process.env.DEBUG_EVAL_PATTERN
             : undefined);
-    const debugSample = opts.debugSample ??
+    const filterSample = opts.filterSample ??
         (process.env.DEBUG_EVAL_SAMPLE && !debugEnvDisabled
             ? parseInt(process.env.DEBUG_EVAL_SAMPLE, 10)
             : undefined);
     const debugEnabled = opts.debug ||
         process.env.DEBUG_EVAL === "1" ||
-        debugN !== undefined ||
-        debugPattern !== undefined ||
-        debugSample !== undefined;
+        filterFirstN !== undefined ||
+        filterPattern !== undefined ||
+        filterSample !== undefined;
     const debug = debugEnabled
         ? {
             enabled: true,
-            firstN: debugN,
-            pattern: debugPattern,
-            sample: debugSample,
+            firstN: filterFirstN,
+            pattern: filterPattern,
+            sample: filterSample,
         }
         : undefined;
     // Search mode validation
@@ -95,16 +99,29 @@ export function computeResolvedOptions(opts) {
         console.error(`❌ Invalid --search mode "${searchMode}". Must be one of: ${VALID_SEARCH_MODES.join(", ")}`);
         process.exit(1);
     }
-    // Merge repeatable args (singular + plural aliases)
-    const urlArgs = [...opts.url, ...opts.urls];
-    const headerArgs = [...opts.header, ...opts.headers];
-    const allowedOriginArgs = [...opts.allowedOrigin, ...opts.allowedOrigins];
-    const sanityDocumentArgs = [...opts.sanityDocument, ...opts.sanityDocuments];
-    // Source overrides
-    const datasetOverride = opts.sanityDataset;
-    const projectIdOverride = opts.sanityProject;
+    // Merge repeatable args (singular + plural aliases). `headerArgs` and
+    // `allowedOriginArgs` are populated from `.ailf/config.yaml`'s `agentic`
+    // block (W0077 Phase 6f); the CLI flags `--header` and `--allowed-origin`
+    // were retired. The URL-classification block below may still append a
+    // host to `allowedOriginArgs` when neither config nor CLI provided one.
+    // The `DOC_HEADERS` and `DOC_ALLOWED_ORIGIN(S)` env vars still merge in
+    // sources.ts at the doc-fetch boundary (additive, unchanged).
+    const urlArgs = opts.url;
+    const headerArgs = [];
+    const allowedOriginArgs = [];
+    const sanityDocumentArgs = opts.sanityDocument;
+    if (repoConfig?.agentic?.headers) {
+        for (const [key, value] of Object.entries(repoConfig.agentic.headers)) {
+            headerArgs.push(`${key}: ${value}`);
+        }
+    }
+    if (repoConfig?.agentic?.allowedOrigins) {
+        allowedOriginArgs.push(...repoConfig.agentic.allowedOrigins);
+    }
+    // Source overrides — perspective stays per-run (CLI flag), the dataset /
+    // project / studio-origin trio moved into `.ailf/config.yaml`'s `source`
+    // block in W0077 Phase 6d. Env vars still win over the config-file value.
     const perspectiveOverride = opts.sanityPerspective;
-    const studioOriginOverride = opts.sanityStudioOrigin;
     // URL classification (pure computation — results captured, not applied to env)
     if (urlArgs.length > 0) {
         const classification = classifyUrls(urlArgs);
@@ -115,21 +132,6 @@ export function computeResolvedOptions(opts) {
             sanityDocumentArgs.push(...merged);
         }
     }
-    // Validate custom headers (early error)
-    if (headerArgs.length > 0) {
-        for (const h of headerArgs) {
-            const colonIdx = h.indexOf(":");
-            if (colonIdx === -1) {
-                console.error(`❌ Invalid header format: "${h}". Expected "Key: Value".`);
-                process.exit(1);
-            }
-            const key = h.slice(0, colonIdx).trim();
-            if (!key) {
-                console.error(`❌ Invalid header: empty key in "${h}"`);
-                process.exit(1);
-            }
-        }
-    }
     // Auto-infer allowed origin from --url
     if (urlArgs.length > 0 && allowedOriginArgs.length === 0) {
         try {
@@ -170,22 +172,48 @@ export function computeResolvedOptions(opts) {
             }
         }
     }
-    // Comparison: --before auto-enables --compare
-    const beforeOption = opts.before;
-    const compareEnabled = opts.compare || beforeOption !== undefined;
-    // Publish: smart default — auto-publish full runs when report store is configured
+    // Comparison: --before-source auto-enables --compare. The `--compare` flag
+    // is a Commander optional-argument: undefined when not passed, `true` for
+    // the bare flag (compare against latest), and a string path when the user
+    // pinned a specific baseline (`--compare path/to/baseline.json`).
+    const beforeOption = opts.beforeSource;
+    const compareEnabled = (opts.compare !== undefined && opts.compare !== false) ||
+        beforeOption !== undefined;
+    const compareBaseline = typeof opts.compare === "string" ? opts.compare : undefined;
+    // Task-source resolution (W0077 Phase 6h) — `--task-source` and
+    // `--repo-tasks-path` retired. Both move under `taskSource: {...}` in
+    // `.ailf/config.yaml`. Cascade: config → built-in default (content-lake).
+    // When type is `repo` and no path is set, fall back to `<cwd>/.ailf/tasks/`
+    // (the location `ailf init` scaffolds).
+    const resolvedTaskSourceType = resolveTaskSourceType(repoConfig?.taskSource?.type);
+    const resolvedRepoTasksPath = resolveRepoTasksPath(callerCwd, repoConfig?.taskSource?.repoTasksPath, resolvedTaskSourceType);
+    // Source overrides (W0077 Phase 6d) — `--sanity-dataset`, `--sanity-project`,
+    // and `--sanity-studio-origin` were retired from `ailf run`. Cascade is now:
+    //   env var > .ailf/config.yaml `source.*` > built-in default (in sources.ts).
+    const datasetOverride = process.env.SANITY_DATASET ?? repoConfig?.source?.dataset;
+    const projectIdOverride = process.env.SANITY_PROJECT_ID ?? repoConfig?.source?.projectId;
+    const studioOriginOverride = process.env.SANITY_STUDIO_ORIGIN ?? repoConfig?.source?.studioOrigin;
+    // Report store overrides (W0077 Phase 6e — `--report-dataset` and
+    // `--report-project` retired). Resolution order:
+    //   1. Environment variables (AILF_REPORT_DATASET, AILF_REPORT_PROJECT_ID)
+    //   2. .ailf/config.yaml reportStore block
+    //   3. Eval dataset override (so perspective evals publish to the same dataset)
+    const reportDataset = process.env.AILF_REPORT_DATASET ??
+        repoConfig?.reportStore?.dataset ??
+        datasetOverride ??
+        undefined;
+    const reportProjectId = process.env.AILF_REPORT_PROJECT_ID ??
+        repoConfig?.reportStore?.projectId ??
+        undefined;
+    // Publish polarity (W0077 Phase 4) — auto policy lives in
+    // .ailf/config.yaml's `publish.auto` (or env / default). CLI flags and
+    // AILF_PUBLISH still override the policy.
     const reportStoreToken = process.env.AILF_REPORT_SANITY_API_TOKEN ?? process.env.SANITY_API_TOKEN;
     const reportStoreConfigured = Boolean(reportStoreToken);
-    // Track whether the user explicitly chose --publish or --no-publish.
-    // In remote mode, when this is false we omit the field from the API
-    // request so the server can apply its own default (publish when jobId
-    // is present). Without this, the local smart-default (which checks for
-    // a local Sanity token the CLI doesn't have) would send publish:false
-    // and suppress server-side report publishing.
     const publishExplicit = opts.publish !== undefined || process.env.AILF_PUBLISH !== undefined;
+    const publishAuto = resolvePublishAuto(repoConfig?.publish?.auto);
     let publishEnabled;
     if (opts.publish !== undefined) {
-        // Explicit --publish or --no-publish always wins
         publishEnabled = opts.publish;
     }
     else if (process.env.AILF_PUBLISH === "1") {
@@ -195,36 +223,40 @@ export function computeResolvedOptions(opts) {
         publishEnabled = false;
     }
     else {
-        // Smart default: full runs auto-publish when store is configured
-        publishEnabled = reportStoreConfigured && !debugEnabled;
+        // Apply the auto policy. The report store still has to be configured
+        // for `auto: "always"` and `"full-runs"` — without a token, publishing
+        // is impossible regardless of policy.
+        switch (publishAuto) {
+            case "always":
+                publishEnabled = reportStoreConfigured;
+                break;
+            case "never":
+                publishEnabled = false;
+                break;
+            case "full-runs":
+            default:
+                publishEnabled = reportStoreConfigured && !debugEnabled;
+                break;
+        }
     }
-    // Resolve task source + repo tasks path before anything that depends on
-    // them (report store overrides, output dir). When --task-source=repo is
-    // set without --repo-tasks-path, default to ./.ailf/tasks/ — the location
-    // created by `ailf init`.
-    const resolvedTaskSourceType = resolveTaskSourceType(opts.taskSource);
-    const resolvedRepoTasksPath = resolveRepoTasksPath(callerCwd, opts.repoTasksPath, resolvedTaskSourceType);
-    // Report store overrides — resolution order:
-    //   1. Explicit CLI flags (--report-dataset, --report-project)
-    //   2. Environment variables (AILF_REPORT_DATASET, AILF_REPORT_PROJECT_ID)
-    //   3. .ailf/config.yaml reportStore block (when repo tasks path is set)
-    //   4. Eval dataset override (so perspective evals publish to the same dataset)
-    const repoConfig = loadRepoConfigIfPresent(resolvedRepoTasksPath);
-    const reportDataset = opts.reportDataset ??
-        process.env.AILF_REPORT_DATASET ??
-        repoConfig?.reportStore?.dataset ??
-        datasetOverride ??
-        undefined;
-    const reportProjectId = opts.reportProject ??
-        process.env.AILF_REPORT_PROJECT_ID ??
-        repoConfig?.reportStore?.projectId ??
-        undefined;
+    // Tag default cascade: --publish-tag > AILF_PUBLISH_TAG > .ailf/config.yaml
+    const publishTag = opts.publishTag ?? process.env.AILF_PUBLISH_TAG ?? repoConfig?.publish?.tag;
+    // Execution-tier resolution (W0077 Phase 6b) — concurrency, grader
+    // replications, gap analysis, and api URL all moved from CLI flags to
+    // `.ailf/config.yaml`'s `execution` block. Cascade for each:
+    //   env var (where one exists) > .ailf/config.yaml > built-in default
+    const concurrency = repoConfig?.execution?.concurrency;
+    const graderReplications = repoConfig?.execution?.graderReplications;
+    const gapAnalysisEnabled = repoConfig?.execution?.gapAnalysis ?? true;
     // Remote mode
     const remote = opts.remote || process.env.AILF_REMOTE === "1";
-    const apiUrl = opts.apiUrl ?? process.env.AILF_API_URL ?? "https://ailf-api.sanity.build";
+    const apiUrl = process.env.AILF_API_URL ??
+        repoConfig?.execution?.apiUrl ??
+        "https://ailf-api.sanity.build";
     const apiKey = process.env.AILF_API_KEY ?? undefined;
-    // Output directory: explicit --output-dir → $CWD/.ailf/results/latest/
-    const outputDir = resolveOutputDir(opts.outputDir);
+    // Output directory (W0077 Phase 6c) — `output.dir` from .ailf/config.yaml
+    // when set, otherwise <cwd>/.ailf/results/latest/.
+    const outputDir = resolveOutputDir(repoConfig?.output?.dir);
     return {
         allowedOriginArgs,
         apiKey,
@@ -232,16 +264,15 @@ export function computeResolvedOptions(opts) {
         areaOption,
         beforeOption,
         changedDocsOption,
-        compareBaseline: opts.compareBaseline,
+        compareBaseline,
         compareEnabled,
         compareThreshold: opts.threshold,
-        concurrency: opts.concurrency,
+        concurrency,
         datasetOverride,
         debug,
-        discoveryReportEnabled: opts.discoveryReport,
         dryRun: opts.dryRun,
-        gapAnalysisEnabled: opts.gapAnalysis,
-        graderReplications: opts.graderReplications,
+        gapAnalysisEnabled,
+        graderReplications,
         headerArgs,
         impactSummary,
         mode,
@@ -256,15 +287,14 @@ export function computeResolvedOptions(opts) {
         promptfooUrl: opts.promptfooUrl,
         publishEnabled,
         publishExplicit,
-        publishTag: opts.publishTag,
-        readinessEnabled: opts.readiness,
+        publishTag,
         remote,
         reportDataset,
         reportProjectId,
         sanityDocumentArgs,
         searchMode,
-        skipEval: opts.skipEval,
-        skipFetch: opts.skipFetch,
+        skipEval: opts.eval === false,
+        skipFetch: opts.fetch === false,
         source: opts.source,
         studioOriginOverride,
         repoTasksPath: resolvedRepoTasksPath,
@@ -272,37 +302,55 @@ export function computeResolvedOptions(opts) {
         tagOption,
         taskSourceType: resolvedTaskSourceType,
         urlArgs,
-        artifactsDisabled: opts.artifacts === false,
-        artifactsDir: resolveArtifactsDir(opts),
-        artifactsDryRun: opts.artifactsDryRun,
-        artifactsExclude: parseArtifactsExcludeList(opts.artifactsExclude),
+        // Artifact-writer settings (W0077 Phase 6g) — `--no-artifacts`,
+        // `--artifacts-dir`, and `--artifacts-exclude` retired. Cascade:
+        //   AILF_ARTIFACTS_DIR > .ailf/config.yaml `artifacts.dir` > default
+        //   .ailf/config.yaml `artifacts.enabled: false` > writers attached
+        //   .ailf/config.yaml `artifacts.exclude` > no exclusions
+        // `--no-artifacts-write` (artifactsDryRun) stays per-run.
+        artifactsDisabled: repoConfig?.artifacts?.enabled === false,
+        artifactsDir: process.env.AILF_ARTIFACTS_DIR ?? repoConfig?.artifacts?.dir,
+        artifactsDryRun: opts.artifactsWrite === false,
+        artifactsExclude: repoConfig?.artifacts?.exclude,
         classificationOption: opts.classification?.trim() || undefined,
-        ownerTeamOption: opts.ownerTeam?.trim() || undefined,
-        ownerIndividualOption: opts.ownerIndividual?.trim() || undefined,
+        // Owner attribution (W0077 Phase 6f) — `--owner-team` and
+        // `--owner-individual` retired. Cascade: AILF_OWNER_TEAM /
+        // AILF_OWNER_INDIVIDUAL env vars > .ailf/config.yaml `owner.*` > undefined.
+        // Downstream resolution in build-request.ts already honors the env var as a
+        // fallback when this option is unset, but threading it through here keeps
+        // the cascade order explicit.
+        ownerTeamOption: process.env.AILF_OWNER_TEAM?.trim() ||
+            repoConfig?.owner?.team ||
+            undefined,
+        ownerIndividualOption: process.env.AILF_OWNER_INDIVIDUAL?.trim() ||
+            repoConfig?.owner?.individual ||
+            undefined,
         purposeOption: opts.purpose?.trim() || undefined,
         labelOptions: opts.label ?? [],
     };
 }
+const PUBLISH_AUTO_VALUES = ["always", "full-runs", "never"];
 /**
- * Resolve the artifacts output directory from CLI flags and env vars.
- * Precedence (highest first):
- *   1. `--artifacts-dir` flag
- *   2. `AILF_ARTIFACTS_DIR` env var
+ * Resolve the publish auto policy. Precedence:
+ *   1. .ailf/config.yaml `publish.auto`
+ *   2. AILF_PUBLISH_AUTO env var
+ *   3. Default: "full-runs" (preserves the historical smart default)
  *
- * The `--capture-dir` / `AILF_CAPTURE_DIR` aliases were retired in W0052;
- * callers of those names are rejected at CLI entry (see cli.ts).
+ * Unrecognized env-var values fall through to the default with a warning;
+ * the schema validates the config-file value at parse time.
  */
-function resolveArtifactsDir(opts) {
-    return opts.artifactsDir ?? process.env.AILF_ARTIFACTS_DIR;
-}
-function parseArtifactsExcludeList(raw) {
-    if (!raw)
-        return undefined;
-    const list = raw
-        .split(",")
-        .map((s) => s.trim())
-        .filter(Boolean);
-    return list.length > 0 ? list : undefined;
+function resolvePublishAuto(repoValue) {
+    if (repoValue)
+        return repoValue;
+    const envValue = process.env.AILF_PUBLISH_AUTO?.trim();
+    if (envValue &&
+        PUBLISH_AUTO_VALUES.includes(envValue)) {
+        return envValue;
+    }
+    if (envValue) {
+        console.warn(`⚠️  AILF_PUBLISH_AUTO="${envValue}" is not recognized; valid values are ${PUBLISH_AUTO_VALUES.join(", ")}. Falling back to "full-runs".`);
+    }
+    return "full-runs";
 }
 /** Resolve and validate the --task-source flag value. */
 function resolveTaskSourceType(raw) {
@@ -370,18 +418,11 @@ export async function executePipeline(cliOpts) {
         const callerCwd = getCallerCwd();
         const adapter = new FileConfigAdapter(cliOpts.config, ROOT);
         const config = await adapter.resolve();
-        // Merge CLI-only flags that aren't in the config file.
-        // The file config (from the API payload) has taskSourceType and other
-        // pipeline options, but CLI-only args like --repo-tasks-path and
-        // --output are only available from the command line.
-        if (cliOpts.repoTasksPath) {
-            config.repoTasksPath = resolve(callerCwd, cliOpts.repoTasksPath);
-        }
-        else if (config.taskSourceType === "repo" && !config.repoTasksPath) {
-            // Default: when taskSource=repo but no path set, look in .ailf/tasks/
-            // (matches the `ailf init` scaffold location). Silent fallback here —
-            // composition root will surface a helpful error if the directory is
-            // missing.
+        // When `taskSource.type` is `repo` and no `repoTasksPath` was set in
+        // the config file, fall back to `<callerCwd>/.ailf/tasks/` (the
+        // location `ailf init` scaffolds). Silent fallback — composition-root
+        // surfaces a helpful error if the directory is missing.
+        if (config.taskSourceType === "repo" && !config.repoTasksPath) {
             const defaultPath = resolve(callerCwd, ".ailf", "tasks");
             if (existsSync(defaultPath)) {
                 config.repoTasksPath = defaultPath;
@@ -390,18 +431,13 @@ export async function executePipeline(cliOpts) {
         if (cliOpts.output) {
             config.outputPath = resolve(callerCwd, cliOpts.output);
         }
-        // Output dir: explicit CLI flag → $CWD/.ailf/results/latest/
-        config.outputDir = resolveOutputDir(cliOpts.outputDir);
-        // Artifact options — CLI flags and env vars aren't in the config file,
-        // so merge them here (same logic as resolveOptions).
-        const resolvedArtifactsDir = resolveArtifactsDir(cliOpts);
-        config.artifactsDisabled ??= cliOpts.artifacts === false;
-        config.artifactsDir ??= resolvedArtifactsDir;
-        config.artifactsDryRun ??= cliOpts.artifactsDryRun;
-        const excludeList = parseArtifactsExcludeList(cliOpts.artifactsExclude);
-        if (excludeList) {
-            config.artifactsExclude = excludeList;
-        }
+        // Artifact-writer env-var fallbacks. The adapter populates the bulk of
+        // the artifact settings from `EvalConfigSchema.artifacts.*` (W0077
+        // Phase 6g); we layer the env-var fallbacks here for fields the schema
+        // doesn't cover (GCS bucket, upload mode), and the AILF_ARTIFACTS_DIR
+        // override that wins over both schema and CLI.
+        config.artifactsDir = process.env.AILF_ARTIFACTS_DIR ?? config.artifactsDir;
+        config.artifactsDryRun ??= cliOpts.artifactsWrite === false;
         config.artifactGcsBucket ??= process.env.AILF_GCS_ARTIFACT_BUCKET;
         config.artifactUpload ??= parseArtifactUploadEnv(process.env.AILF_ARTIFACT_UPLOAD);
         // Create AppContext directly from the merged config so adapters
@@ -470,18 +506,16 @@ function writePipelineResult(result, outputDir) {
     console.log(`  📄 Pipeline result: ${resultFile}\n`);
 }
 /**
- * Load .ailf/config.yaml if --repo-tasks-path is set and the config file
- * exists. Returns null if not applicable.
+ * Load `<cwd>/.ailf/config.yaml` if it exists. Returns null when the file
+ * is absent or unparseable.
  *
- * The config.yaml lives one level up from the tasks/ directory:
- *   .ailf/config.yaml  ← config
- *   .ailf/tasks/       ← repoTasksPath
+ * Auto-loads regardless of `--task-source`: the same `.ailf/config.yaml` is
+ * the per-environment configuration home for every run (W0077 Phase 6a).
+ * Subsequent flag-family migrations (6b–6h) read additional fields from
+ * this same file via the same loader.
  */
-function loadRepoConfigIfPresent(repoTasksPath) {
-    if (!repoTasksPath)
-        return null;
-    // .ailf/tasks/ → .ailf/config.yaml
-    const configPath = resolve(repoTasksPath, "..", "config.yaml");
+function loadRepoConfigIfPresent(cwd) {
+    const configPath = resolve(cwd, ".ailf", "config.yaml");
     if (!existsSync(configPath))
         return null;
     try {

package/dist/commands/pr-comment.js CHANGED Viewed

@@ -16,7 +16,7 @@ const ROOT = resolve(__dirname, "..", "..");
 export function createPrCommentCommand() {
     const cmd = new Command("pr-comment")
         .description("Generate a markdown PR comment from evaluation scores")
-        .option("--output <path>", "Write comment to file (default: stdout)")
+        .option("-o, --output <path>", "Write comment to file (default: stdout)")
         .option("--promptfoo-url <url>", "Promptfoo share URL to include")
         .action(async (opts) => {
         try {
@@ -29,8 +29,6 @@ export function createPrCommentCommand() {
                 skipEval: true,
                 compareEnabled: false,
                 gapAnalysisEnabled: false,
-                readinessEnabled: false,
-                discoveryReportEnabled: false,
                 publishEnabled: false,
                 noCache: true,
                 noRemoteCache: true,