npm - @sanity/ailf - Versions diffs - 3.7.0 → 3.8.1 - Mend

@sanity/ailf 3.7.0 → 3.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

package/config/airbyte/ai_literacy_framework.connector.yaml +1 -1
package/config/thresholds.ts +3 -3
package/dist/_vendor/ailf-core/examples/index.d.ts +2 -2
package/dist/_vendor/ailf-core/examples/index.js +2 -2
package/dist/_vendor/ailf-core/ports/context.d.ts +0 -4
package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +38 -12
package/dist/_vendor/ailf-core/schemas/eval-config.js +102 -22
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +4 -6
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -3
package/dist/_vendor/ailf-core/schemas/schedules.d.ts +2 -2
package/dist/_vendor/ailf-shared/run-classification.d.ts +2 -2
package/dist/_vendor/ailf-shared/run-classification.js +1 -1
package/dist/_vendor/ailf-shared/run-context.d.ts +1 -1
package/dist/adapters/api-client/build-request.d.ts +0 -2
package/dist/adapters/api-client/build-request.js +2 -6
package/dist/adapters/config-sources/cli-config-adapter.d.ts +1 -1
package/dist/adapters/config-sources/file-config-adapter.d.ts +1 -1
package/dist/adapters/config-sources/file-config-adapter.js +42 -17
package/dist/adapters/task-sources/repo-schemas.d.ts +41 -3
package/dist/adapters/task-sources/repo-schemas.js +127 -0
package/dist/cli-program.d.ts +39 -0
package/dist/cli-program.js +137 -0
package/dist/cli.d.ts +8 -2
package/dist/cli.js +128 -142
package/dist/commands/agent-report.js +1 -1
package/dist/commands/calculate-scores.js +0 -2
package/dist/commands/check-staleness.js +1 -1
package/dist/commands/chronic-failures.js +4 -4
package/dist/commands/coverage-audit.js +6 -7
package/dist/commands/discovery-report.js +16 -4
package/dist/commands/eval.d.ts +1 -1
package/dist/commands/eval.js +1 -1
package/dist/commands/explain-handler.d.ts +1 -1
package/dist/commands/explain-handler.js +13 -44
package/dist/commands/fetch-docs.js +0 -2
package/dist/commands/generate-configs.js +0 -2
package/dist/commands/grader/index.js +3 -3
package/dist/commands/init.d.ts +2 -2
package/dist/commands/init.js +10 -9
package/dist/commands/interactive.d.ts +1 -1
package/dist/commands/interactive.js +8 -8
package/dist/commands/pipeline-action.d.ts +1 -3
package/dist/commands/pipeline-action.js +174 -140
package/dist/commands/pr-comment.js +1 -3
package/dist/commands/publish.d.ts +1 -1
package/dist/commands/publish.js +2 -4
package/dist/commands/readiness-report.js +17 -8
package/dist/commands/remote-pipeline.d.ts +1 -1
package/dist/commands/remote-pipeline.js +1 -3
package/dist/commands/run.d.ts +64 -0
package/dist/commands/{pipeline.js → run.js} +19 -30
package/dist/commands/shared/help.js +4 -4
package/dist/commands/shared/options.d.ts +29 -3
package/dist/commands/shared/options.js +37 -13
package/dist/commands/validate-tasks.js +1 -1
package/dist/commands/validate.d.ts +1 -1
package/dist/commands/validate.js +2 -2
package/dist/commands/weekly-digest.js +3 -3
package/dist/config/thresholds.ts +3 -3
package/dist/orchestration/build-app-context.js +0 -2
package/dist/orchestration/build-step-sequence.js +1 -11
package/dist/orchestration/steps/fetch-docs-step.js +1 -1
package/dist/orchestration/steps/index.d.ts +0 -2
package/dist/orchestration/steps/index.js +0 -2
package/dist/orchestration/steps/run-eval-step.js +1 -1
package/dist/pipeline/cache.d.ts +1 -1
package/dist/pipeline/map-request-to-config.js +0 -2
package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
package/dist/pipeline/plan.d.ts +2 -4
package/dist/pipeline/plan.js +4 -32
package/dist/pipeline/run-context.d.ts +1 -1
package/dist/pipeline/run-context.js +4 -4
package/dist/pipeline/validate.d.ts +1 -1
package/dist/pipeline/validate.js +1 -1
package/package.json +11 -9
package/dist/commands/pipeline.d.ts +0 -77
package/dist/orchestration/steps/discovery-report-step.d.ts +0 -13
package/dist/orchestration/steps/discovery-report-step.js +0 -62
package/dist/orchestration/steps/readiness-step.d.ts +0 -13
package/dist/orchestration/steps/readiness-step.js +0 -98
package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +0 -10
package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +0 -366
package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +0 -9
package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +0 -145
package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +0 -10
package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +0 -314
package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +0 -10
package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +0 -486
package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +0 -10
package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +0 -425
package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +0 -9
package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +0 -332
package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +0 -12
package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +0 -210
package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +0 -7
package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +0 -404
package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +0 -10
package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +0 -184
package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +0 -8
package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +0 -301
package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +0 -9
package/dist/pipeline/compiler/__tests__/telemetry.test.js +0 -503
package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +0 -10
package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +0 -509

package/dist/commands/publish.d.ts CHANGED Viewed

@@ -4,7 +4,7 @@
  *
  * Reads a score-summary.json (defaulting to results/latest/score-summary.json),
  * builds provenance, writes the report to Sanity, and fans out to configured
- * sinks — exactly the same as the publish step in `ailf pipeline`, but
+ * sinks — exactly the same as the publish step in `ailf run`, but
  * standalone.
  *
  * Uses createAppContext() (composition root) for all infrastructure access.

package/dist/commands/publish.js CHANGED Viewed

@@ -4,7 +4,7 @@
  *
  * Reads a score-summary.json (defaulting to results/latest/score-summary.json),
  * builds provenance, writes the report to Sanity, and fans out to configured
- * sinks — exactly the same as the publish step in `ailf pipeline`, but
+ * sinks — exactly the same as the publish step in `ailf run`, but
  * standalone.
  *
  * Uses createAppContext() (composition root) for all infrastructure access.
@@ -88,7 +88,6 @@ async function runPublishCommand(summaryPath, outputDir, opts) {
     // Wire up infrastructure via composition root
     const ctx = createAppContext({
         compareEnabled: false,
-        discoveryReportEnabled: false,
         gapAnalysisEnabled: false,
         mode: "literacy",
         noAutoScope: false,
@@ -97,7 +96,6 @@ async function runPublishCommand(summaryPath, outputDir, opts) {
         outputDir,
         publishEnabled: true,
         publishTag: opts.tag,
-        readinessEnabled: false,
         rootDir: ROOT,
         searchMode: "open",
         skipEval: true,
@@ -117,7 +115,7 @@ async function runPublishCommand(summaryPath, outputDir, opts) {
     if (!existsSync(resolvedPath)) {
         console.error(`  ✖ File not found: ${resolvedPath}`);
         console.error();
-        console.error("  Hint: Run `ailf pipeline` first to generate results,");
+        console.error("  Hint: Run `ailf run` first to generate results,");
         console.error("  or provide a path to an existing score-summary.json.");
         process.exit(1);
     }

package/dist/commands/readiness-report.js CHANGED Viewed

@@ -15,23 +15,32 @@ import { formatReadinessMarkdown, generateReadinessReport, } from "../pipeline/r
 import { ThresholdConfigSchema, } from "../pipeline/schemas.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..", "..");
-const SCORE_SUMMARY_PATH = join(ROOT, "results", "latest", "score-summary.json");
-const GAP_ANALYSIS_PATH = join(ROOT, "results", "latest", "gap-analysis.json");
+const DEFAULT_RESULTS_DIR = join(ROOT, "results", "latest");
 // thresholds loaded via loadConfigFile below
 const BASELINES_DIR = join(ROOT, "results", "baselines");
+/** Resolve `--from-run` to an absolute results directory. */
+function resolveFromRun(value) {
+    if (value === "latest")
+        return DEFAULT_RESULTS_DIR;
+    return resolve(value);
+}
 export function createReadinessReportCommand() {
-    return new Command("readiness-report")
+    return new Command("readiness")
         .description("Generate launch readiness report for a feature area")
         .requiredOption("-a, --area <area>", "Feature area to evaluate (required)")
+        .option("--from-run <path>", "Results directory to read from (`latest` or a path to a results directory containing score-summary.json)", "latest")
         .option("-H, --history", "Include historical progress from baselines", false)
         .option("-o, --output <path>", "Write markdown to file instead of stdout")
         .action(async (opts) => {
+        const resultsDir = resolveFromRun(opts.fromRun);
+        const scoreSummaryPath = join(resultsDir, "score-summary.json");
+        const gapAnalysisPath = join(resultsDir, "gap-analysis.json");
         // Load score summary
-        if (!existsSync(SCORE_SUMMARY_PATH)) {
-            console.error(`❌ Score summary not found at ${SCORE_SUMMARY_PATH}. Run \`pnpm pipeline\` first.`);
+        if (!existsSync(scoreSummaryPath)) {
+            console.error(`❌ Score summary not found at ${scoreSummaryPath}. Run \`ailf run\` first or pass --from-run <path>.`);
             process.exit(1);
         }
-        const scoreSummary = JSON.parse(readFileSync(SCORE_SUMMARY_PATH, "utf-8"));
+        const scoreSummary = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
         // Load threshold config
         let parsedThresholds;
         try {
@@ -57,8 +66,8 @@ export function createReadinessReportCommand() {
         const thresholdConfig = thresholdResult.data;
         // Load gap analysis (optional)
         let gapAnalysis;
-        if (existsSync(GAP_ANALYSIS_PATH)) {
-            gapAnalysis = JSON.parse(readFileSync(GAP_ANALYSIS_PATH, "utf-8"));
+        if (existsSync(gapAnalysisPath)) {
+            gapAnalysis = JSON.parse(readFileSync(gapAnalysisPath, "utf-8"));
         }
         const history = [];
         if (opts.history && existsSync(BASELINES_DIR)) {

package/dist/commands/remote-pipeline.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * remote-pipeline.ts — Remote execution flow for `ailf pipeline --remote`.
+ * remote-pipeline.ts — Remote execution flow for `ailf run --remote`.
  *
  * Reads local `.ailf/tasks/` YAML, validates locally via Zod schemas,
  * submits to the AILF API, polls for completion, and writes the same

package/dist/commands/remote-pipeline.js CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * remote-pipeline.ts — Remote execution flow for `ailf pipeline --remote`.
+ * remote-pipeline.ts — Remote execution flow for `ailf run --remote`.
  *
  * Reads local `.ailf/tasks/` YAML, validates locally via Zod schemas,
  * submits to the AILF API, polls for completion, and writes the same
@@ -135,8 +135,6 @@ function toConfigSlice(opts) {
         perspectiveOverride: opts.perspectiveOverride,
         graderReplications: opts.graderReplications,
         gapAnalysisEnabled: opts.gapAnalysisEnabled,
-        readinessEnabled: opts.readinessEnabled,
-        discoveryReportEnabled: opts.discoveryReportEnabled,
         noRemoteCache: opts.noRemoteCache,
         // D0037 / W0069 caller envelope overrides — flags override env vars
         // inside buildCallerEnvelope(), which also merges AILF_* defaults.

package/dist/commands/run.d.ts ADDED Viewed

@@ -0,0 +1,64 @@
+/**
+ * run command — the main evaluation entry point.
+ *
+ * Renamed from `ailf pipeline` to `ailf run` per W0077 Phase 1
+ * (see docs/design-docs/pipeline-command-surface.md §3). The command drives
+ * the evaluation pipeline; the orchestrator internals retain the "pipeline"
+ * name because they describe a multi-step process, not the CLI verb.
+ *
+ * Defines all 44+ CLI flags via Commander, resolves them into a typed
+ * options object, bridges to process.env for downstream modules, and
+ * delegates to runPipeline().
+ *
+ * @see docs/guides/cli-guide.md for per-flag behavior notes, or
+ * docs/references/cli-reference.md for the auto-generated flag matrix.
+ */
+import { Command } from "commander";
+/**
+ * Raw CLI options as parsed by Commander.
+ * Field names follow Commander's camelCase convention for kebab-case flags.
+ */
+export interface PipelineCliOptions {
+    area?: string;
+    autoScope: boolean;
+    /** `--before-source <name>` — swap the doc source to a "before" state for impact evaluation. */
+    beforeSource?: string;
+    cache: boolean;
+    changedDocs?: string;
+    /**
+     * `--compare [baseline]` — Commander optional argument.
+     *   undefined → flag not passed
+     *   true      → bare `--compare` (compare against latest baseline)
+     *   string    → `--compare path/to/baseline.json` (pin to a file)
+     */
+    compare?: boolean | string;
+    config?: string;
+    debug: boolean;
+    filterFirstN?: number;
+    filterPattern?: string;
+    filterSample?: number;
+    dryRun: boolean;
+    eval: boolean;
+    fetch: boolean;
+    mode: string;
+    variant?: string;
+    output?: string;
+    promptfooUrl?: string;
+    publish?: boolean;
+    publishTag?: string;
+    remoteCache?: boolean;
+    sanityDocument: string[];
+    sanityPerspective?: string;
+    search?: string;
+    source?: string;
+    remote: boolean;
+    task?: string;
+    tag: string[];
+    threshold?: number;
+    url: string[];
+    artifactsWrite: boolean;
+    classification?: string;
+    purpose?: string;
+    label: string[];
+}
+export declare function createRunCommand(): Command;

package/dist/commands/{pipeline.js → run.js} RENAMED Viewed

@@ -1,23 +1,29 @@
 /**
- * pipeline command — the main evaluation pipeline orchestrator.
+ * run command — the main evaluation entry point.
  *
- * Defines all 36+ CLI flags via Commander, resolves them into a typed
+ * Renamed from `ailf pipeline` to `ailf run` per W0077 Phase 1
+ * (see docs/design-docs/pipeline-command-surface.md §3). The command drives
+ * the evaluation pipeline; the orchestrator internals retain the "pipeline"
+ * name because they describe a multi-step process, not the CLI verb.
+ *
+ * Defines all 44+ CLI flags via Commander, resolves them into a typed
  * options object, bridges to process.env for downstream modules, and
  * delegates to runPipeline().
  *
- * @see docs/cli.md for the full flag reference.
+ * @see docs/guides/cli-guide.md for per-flag behavior notes, or
+ * docs/references/cli-reference.md for the auto-generated flag matrix.
  */
 import { Command } from "commander";
-import { addAgenticOptions, addDebugOptions, addSanitySourceOptions, } from "./shared/options.js";
-export function createPipelineCommand() {
-    const cmd = new Command("pipeline")
+import { addAgenticOptions, addDebugOptions, addSanityScopeOptions, } from "./shared/options.js";
+export function createRunCommand() {
+    const cmd = new Command("run")
         .description("Run the full evaluation pipeline")
         .option("-m, --mode <mode>", "Evaluation mode: literacy (default), mcp-server, agent-harness, knowledge-probe, custom. Legacy aliases (baseline, agentic, observed, full) are accepted and normalized to literacy + variant.", "literacy")
         .option("--variant <variant>", "Literacy variant: full (default — standard + agentic), baseline (standard only), agentic (agentic only), observed. Only applies to --mode literacy.")
         .option("-s, --source <name>", "Documentation source name (from sources.yaml)")
         .option("-n, --dry-run", "Validate configuration only, no execution", false)
-        .option("--skip-fetch", "Reuse cached documentation contexts", false)
-        .option("--skip-eval", "Recalculate from existing eval results", false)
+        .option("--no-fetch", "Reuse cached documentation contexts")
+        .option("--no-eval", "Recalculate from existing eval results")
         .option("--no-cache", "Bypass all pipeline-level caching")
         .option("--no-remote-cache", "Disable Content Lake cache lookup (local cache still active)")
         .option("--no-auto-scope", "Disable release-aware auto-scoping (evaluate all tasks even when a perspective is set)")
@@ -31,39 +37,22 @@ export function createPipelineCommand() {
             .filter(Boolean),
     ], [])
         .option("--changed-docs <slugs>", "Auto-scope to tasks affected by these document slugs")
-        .option("-j, --concurrency <n>", "Max parallel API calls during evaluation", parseInt)
-        .option("--grader-replications <n>", "Grader consistency replications", parseInt)
-        .option("--before <source>", "Before-state for impact evaluation")
-        .option("-c, --compare", "Compare scores against latest baseline", false)
-        .option("--compare-baseline <path>", "Specific baseline file to compare")
+        .option("--before-source <name>", "Swap the doc source to a `before` state and run a paired evaluation for impact analysis. Pairs with `ailf baseline` and `--compare`. Distinct from `--compare <baseline>`, which compares scores against a saved snapshot.")
+        .option("-c, --compare [baseline]", "Compare scores against the latest baseline. Pass a path to pin a specific baseline file (e.g. --compare results/baselines/2026-04-22.json).")
         .option("--threshold <n>", "Noise threshold for comparison (default: 2)", parseFloat)
-        .option("--no-gap-analysis", "Skip failure mode + impact analysis")
-        .option("--readiness", "Generate launch readiness checklist", false)
-        .option("--discovery-report", "Generate agent discoverability report", false)
         .option("-p, --publish", "Write report to Sanity + fan out to sinks (auto-enabled for full runs when report store is configured)")
         .option("--no-publish", "Suppress auto-publishing")
         .option("--publish-tag <tag>", "Label for published report")
-        .option("--report-dataset <name>", "Sanity dataset for report store")
-        .option("--report-project <id>", "Sanity project ID for report store")
         .option("--config <path>", "Load pipeline config from a TS/JS/YAML/JSON file (overrides most CLI flags)")
         .option("-o, --output <path>", "Write PR comment markdown to file")
-        .option("--output-dir <path>", "Base directory for pipeline output artifacts (default: inferred from execution context)")
         .option("--promptfoo-url <url>", "Promptfoo share URL for report")
-        .option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), repo (repo tasks only, no Content Lake merge)", "content-lake")
-        .option("--repo-tasks-path <path>", "Path to repo-based task definitions. Defaults to ./.ailf/tasks/ when --task-source=repo.")
         .option("--remote", "Submit evaluation to the AILF API instead of running locally", false)
-        .option("--api-url <url>", "AILF API base URL (default: https://ailf-api.sanity.build)")
-        .option("--no-artifacts", "Disable all artifact writers (D0033). Overrides --artifacts-dir.")
-        .option("--artifacts-dir <path>", "Root directory for local artifact output (D0033; default: .ailf/results/captures/)")
-        .option("--artifacts-dry-run", "Run artifact writers in dry-run mode — log intended writes, touch no storage", false)
-        .option("--artifacts-exclude <types>", "Comma-separated artifact types to skip (e.g. traces,graderPrompts)")
+        .option("--no-artifacts-write", "Run artifact writers in dry-run mode — log intended writes, touch no storage")
         // D0037 caller envelope (W0069) — threads through --remote so the
         // server-side pipeline attributes provenance to the caller, not the
         // API gateway runner. All env-var equivalents are honored too;
         // explicit flags win over env vars.
-        .option("--classification <value>", "Run classification for provenance: official | ad-hoc | experimental | test | external. Overrides AILF_CLASSIFICATION. See D0037.")
-        .option("--owner-team <slug>", "Team slug this run is attributable to. Overrides AILF_OWNER_TEAM.")
-        .option("--owner-individual <slug>", "Individual (GH actor / user ID) this run is attributable to. Overrides AILF_OWNER_INDIVIDUAL.")
+        .option("--classification <value>", "Run classification for provenance: official | adhoc | experimental | test | external. Overrides AILF_CLASSIFICATION. See D0037.")
         .option("--purpose <text>", 'Free-text "why I ran this" attached to provenance. Overrides AILF_PURPOSE.')
         .option("--label <value>", "Free-form searchable label (repeatable). Appends to any AILF_LABELS env value.", (val, prev) => [
         ...prev,
@@ -78,7 +67,7 @@ export function createPipelineCommand() {
     });
     // Add shared option groups
     addDebugOptions(cmd);
-    addSanitySourceOptions(cmd);
+    addSanityScopeOptions(cmd);
     addAgenticOptions(cmd);
     return cmd;
 }

package/dist/commands/shared/help.js CHANGED Viewed

@@ -67,14 +67,14 @@ function hasColorSupport() {
 // ---------------------------------------------------------------------------
 const afterHelpText = `
 Quick Start:
-  $ ailf pipeline --debug          Run a quick evaluation (first 2 tests)
-  $ ailf pipeline --area groq      Evaluate a specific feature area
-  $ ailf pipeline --explain        Preview the execution plan
+  $ ailf run --debug               Run a quick evaluation (first 2 tests)
+  $ ailf run --area groq           Evaluate a specific feature area
+  $ ailf run --explain             Preview the execution plan
   $ ailf init                      Set up AILF in a new project
 Documentation:
   Repository   https://github.com/sanity-io/ai-literacy-framework
-  CLI Guide    https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/cli.md
+  CLI Guide    https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/guides/cli-guide.md
   Getting Started  https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/getting-started.md
 Run ailf <command> --help for detailed usage of any command.`;

package/dist/commands/shared/options.d.ts CHANGED Viewed

@@ -7,11 +7,22 @@
  */
 import type { Command } from "commander";
 /**
- * Add agentic options: --url, --header, --allowed-origin, --search
+ * Add agentic options on `ailf run`: `--url` and `--search`. Both are
+ * per-run overrides.
+ *
+ * **`--header` and `--allowed-origin` retired in W0077 Phase 6f** —
+ * configure them in `.ailf/config.yaml`'s `agentic` block instead. The
+ * `DOC_HEADERS` and `DOC_ALLOWED_ORIGIN(S)` env vars still apply.
  */
 export declare function addAgenticOptions(cmd: Command): Command;
 /**
- * Add debug options: --debug, --debug-n, --debug-pattern, --debug-sample
+ * Add debug + filter options.
+ *
+ * `-d, --debug` is a shortcut that runs only the first 2 tests for fast
+ * feedback. `--filter-first-n`, `--filter-pattern`, and `--filter-sample`
+ * narrow the test set explicitly and mirror the same-named flags on
+ * `ailf eval` so the pipeline surface matches the underlying Promptfoo
+ * passthrough.
  */
 export declare function addDebugOptions(cmd: Command): Command;
 /**
@@ -27,9 +38,24 @@ export declare function addOutputOptions(cmd: Command): Command;
  */
 export declare function addOutputDirOption(cmd: Command): Command;
 /**
- * Add Sanity source options: --sanity-dataset, --sanity-project, etc.
+ * Add the full Sanity-source CLI surface — dataset, project, perspective,
+ * studio origin, document. Used by `ailf fetch-docs`, which is invoked
+ * directly with explicit Sanity targeting per run.
+ *
+ * **Not used by `ailf run`** post-W0077 Phase 6d. The dataset, project, and
+ * studio-origin trio moved to `.ailf/config.yaml`'s `source` block (with
+ * `SANITY_DATASET` / `SANITY_PROJECT_ID` / `SANITY_STUDIO_ORIGIN` env-var
+ * fallbacks). `ailf run` uses `addSanityScopeOptions` instead, which keeps
+ * only the per-run flags (`--sanity-perspective`, `--sanity-document`).
  */
 export declare function addSanitySourceOptions(cmd: Command): Command;
+/**
+ * Add the per-run Sanity-scope CLI surface — `--sanity-perspective` and
+ * `--sanity-document`. Used by `ailf run`. The per-environment trio
+ * (dataset, project, studio origin) lives in `.ailf/config.yaml`'s
+ * `source` block instead (W0077 Phase 6d).
+ */
+export declare function addSanityScopeOptions(cmd: Command): Command;
 /**
  * Collect repeatable string options into an array.
  * Used as a Commander argParser for options like --url, --header, --allowed-origin.

package/dist/commands/shared/options.js CHANGED Viewed

@@ -6,27 +6,33 @@
  * (e.g., debug options, Sanity source options, output options).
  */
 /**
- * Add agentic options: --url, --header, --allowed-origin, --search
+ * Add agentic options on `ailf run`: `--url` and `--search`. Both are
+ * per-run overrides.
+ *
+ * **`--header` and `--allowed-origin` retired in W0077 Phase 6f** —
+ * configure them in `.ailf/config.yaml`'s `agentic` block instead. The
+ * `DOC_HEADERS` and `DOC_ALLOWED_ORIGIN(S)` env vars still apply.
  */
 export function addAgenticOptions(cmd) {
     return cmd
         .option("--url <url>", "Documentation URL (repeatable)", collect, [])
-        .option("--urls <url>", "Alias for --url (repeatable)", collect, [])
-        .option("--header <header>", 'Custom HTTP header "Key: Value" (repeatable)', collect, [])
-        .option("--headers <header>", "Alias for --header (repeatable)", collect, [])
-        .option("--allowed-origin <origin>", "Agent origin sandbox (repeatable, supports globs)", collect, [])
-        .option("--allowed-origins <origin>", "Alias for --allowed-origin (repeatable)", collect, [])
         .option("-S, --search <mode>", "Web search mode: open, origin-only, off");
 }
 /**
- * Add debug options: --debug, --debug-n, --debug-pattern, --debug-sample
+ * Add debug + filter options.
+ *
+ * `-d, --debug` is a shortcut that runs only the first 2 tests for fast
+ * feedback. `--filter-first-n`, `--filter-pattern`, and `--filter-sample`
+ * narrow the test set explicitly and mirror the same-named flags on
+ * `ailf eval` so the pipeline surface matches the underlying Promptfoo
+ * passthrough.
  */
 export function addDebugOptions(cmd) {
     return cmd
         .option("-d, --debug", "Run subset of tests for fast feedback", false)
-        .option("--debug-n <n>", "First N tests", parseInt)
-        .option("--debug-pattern <regex>", "Filter tests by description regex")
-        .option("--debug-sample <n>", "Random sample of N tests", parseInt);
+        .option("--filter-first-n <n>", "Run only first N tests", parseInt)
+        .option("--filter-pattern <regex>", "Filter tests by description regex")
+        .option("--filter-sample <n>", "Random sample of N tests", parseInt);
 }
 /**
  * Add output options: --output, --format
@@ -47,7 +53,15 @@ export function addOutputDirOption(cmd) {
     return cmd.option("--output-dir <path>", "Base directory for output artifacts (default: .ailf/results/latest/)");
 }
 /**
- * Add Sanity source options: --sanity-dataset, --sanity-project, etc.
+ * Add the full Sanity-source CLI surface — dataset, project, perspective,
+ * studio origin, document. Used by `ailf fetch-docs`, which is invoked
+ * directly with explicit Sanity targeting per run.
+ *
+ * **Not used by `ailf run`** post-W0077 Phase 6d. The dataset, project, and
+ * studio-origin trio moved to `.ailf/config.yaml`'s `source` block (with
+ * `SANITY_DATASET` / `SANITY_PROJECT_ID` / `SANITY_STUDIO_ORIGIN` env-var
+ * fallbacks). `ailf run` uses `addSanityScopeOptions` instead, which keeps
+ * only the per-run flags (`--sanity-perspective`, `--sanity-document`).
  */
 export function addSanitySourceOptions(cmd) {
     return cmd
@@ -55,8 +69,18 @@ export function addSanitySourceOptions(cmd) {
         .option("--sanity-project <id>", "Override Sanity project ID")
         .option("--sanity-perspective <id>", "Sanity release perspective ID")
         .option("--sanity-studio-origin <url>", "Sanity Studio base URL")
-        .option("--sanity-document <id>", "Evaluate specific Sanity document(s) (repeatable)", collect, [])
-        .option("--sanity-documents <id>", "Alias for --sanity-document (repeatable)", collect, []);
+        .option("--sanity-document <id>", "Evaluate specific Sanity document(s) (repeatable)", collect, []);
+}
+/**
+ * Add the per-run Sanity-scope CLI surface — `--sanity-perspective` and
+ * `--sanity-document`. Used by `ailf run`. The per-environment trio
+ * (dataset, project, studio origin) lives in `.ailf/config.yaml`'s
+ * `source` block instead (W0077 Phase 6d).
+ */
+export function addSanityScopeOptions(cmd) {
+    return cmd
+        .option("--sanity-perspective <id>", "Sanity release perspective ID")
+        .option("--sanity-document <id>", "Evaluate specific Sanity document(s) (repeatable)", collect, []);
 }
 /**
  * Collect repeatable string options into an array.

package/dist/commands/validate-tasks.js CHANGED Viewed

@@ -21,7 +21,7 @@ import { detectLegacyFieldNames, parseCanonicalTaskFile, } from "../adapters/tas
 import { validateCanonicalTasks, formatValidationResult, } from "../adapters/task-sources/repo-validation.js";
 import { discoverTsTaskFiles, loadTsTaskFile, } from "../adapters/task-sources/task-file-loader.js";
 export function createValidateTasksCommand() {
-    return new Command("validate-tasks")
+    return new Command("tasks")
         .description("Validate task files (YAML and TypeScript) in .ailf/tasks/ against the canonical schema")
         .argument("[path]", "Path to tasks directory (default: .ailf/tasks/)", ".ailf/tasks")
         .option("--strict", "Treat warnings as errors", false)

package/dist/commands/validate.d.ts CHANGED Viewed

@@ -6,4 +6,4 @@
  *
  */
 import { Command } from "commander";
-export declare function createValidateCommand(): Command;
+export declare function createValidateConfigCommand(): Command;

package/dist/commands/validate.js CHANGED Viewed

@@ -10,8 +10,8 @@ import { dirname, resolve } from "path";
 import { fileURLToPath } from "url";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..", "..");
-export function createValidateCommand() {
-    return new Command("validate")
+export function createValidateConfigCommand() {
+    return new Command("config")
         .description("Validate pipeline configuration")
         .option("--strict", "Treat warnings as errors", false)
         .option("--contexts", "Check that context files exist", false)

package/dist/commands/weekly-digest.js CHANGED Viewed

@@ -8,11 +8,11 @@
  */
 import { Command } from "commander";
 export function createWeeklyDigestCommand() {
-    return new Command("weekly-digest")
+    return new Command("digest")
         .description("Generate and deliver a weekly evaluation digest")
         .option("-n, --dry-run", "Print to stdout only, do not send to Slack", false)
         .option("--lookback <days>", "Lookback window in days (default: from config or 7)", parseInt)
-        .option("--json", "Output raw JSON digest data", false)
+        .option("-f, --format <fmt>", "Output format: console or json", "console")
         .action(async (opts) => {
         const { generateDigest } = await import("../schedules/digest.js");
         const { getDigestConfig } = await import("../schedules/loader.js");
@@ -45,7 +45,7 @@ export function createWeeklyDigestCommand() {
         console.log(`  Regressed: ${digest.regressed.join(", ") || "none"}`);
         console.log(`  Stable: ${digest.stable.join(", ") || "none"}`);
         console.log();
-        if (opts.json) {
+        if (opts.format === "json") {
             console.log(JSON.stringify(digest, null, 2));
             return;
         }

package/dist/config/thresholds.ts CHANGED Viewed

@@ -2,9 +2,9 @@
  * thresholds.ts — Quality thresholds for readiness gates and regression alerts.
  *
  * Used by:
- * - `npx @sanity/ailf pipeline --readiness` (launch readiness checklist)
- * - `npx @sanity/ailf pipeline --publish` (severity-aware sink routing)
- * - `npx @sanity/ailf pipeline --compare` (regression alerting)
+ * - `npx @sanity/ailf report readiness` (launch readiness checklist)
+ * - `npx @sanity/ailf run --publish` (severity-aware sink routing)
+ * - `npx @sanity/ailf run --compare` (regression alerting)
  *
  * @see docs/archive/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
  */

package/dist/orchestration/build-app-context.js CHANGED Viewed

@@ -44,8 +44,6 @@ export function mapToResolvedConfig(opts, rootDir) {
         compareThreshold: opts.compareThreshold,
         compareBaseline: opts.compareBaseline,
         gapAnalysisEnabled: opts.gapAnalysisEnabled,
-        readinessEnabled: opts.readinessEnabled,
-        discoveryReportEnabled: opts.discoveryReportEnabled,
         publishEnabled: opts.publishEnabled,
         publishTag: opts.publishTag,
         noCache: opts.noCache,

package/dist/orchestration/build-step-sequence.js CHANGED Viewed

@@ -9,14 +9,12 @@ import { LiteracyVariant } from "../pipeline/normalize-mode.js";
 import { CallbackStep } from "./steps/callback-step.js";
 import { CalculateScoresStep } from "./steps/calculate-scores-step.js";
 import { CompareStep } from "./steps/compare-step.js";
-import { DiscoveryReportStep } from "./steps/discovery-report-step.js";
 import { FetchDocsStep } from "./steps/fetch-docs-step.js";
 import { FinalizeRunStep } from "./steps/finalize-run-step.js";
 import { GapAnalysisStep } from "./steps/gap-analysis-step.js";
 import { GenerateConfigsStep } from "./steps/generate-configs-step.js";
 import { GraderConsistencyStep } from "./steps/grader-consistency-step.js";
 import { PublishReportStep } from "./steps/publish-report-step.js";
-import { ReadinessStep } from "./steps/readiness-step.js";
 import { ReportStep } from "./steps/report-step.js";
 import { RunEvalStep } from "./steps/run-eval-step.js";
 import { MirrorRepoTasksStep } from "./steps/mirror-repo-tasks-step.js";
@@ -93,15 +91,7 @@ export function buildStepSequence(ctx, pipelineStart = Date.now()) {
     if (config.compareEnabled) {
         steps.push(new CompareStep());
     }
-    // Step 6b: Readiness report (optional)
-    if (config.readinessEnabled) {
-        steps.push(new ReadinessStep());
-    }
-    // Step 6c: Discovery report (optional)
-    if (config.discoveryReportEnabled) {
-        steps.push(new DiscoveryReportStep());
-    }
-    // Step 7: Callback delivery (optional, API-triggered evaluations)
+    // Step 6: Callback delivery (optional, API-triggered evaluations)
     if (config.callback?.url) {
         steps.push(new CallbackStep(config.callback, config.jobId));
     }

package/dist/orchestration/steps/fetch-docs-step.js CHANGED Viewed

@@ -27,7 +27,7 @@ export class FetchDocsStep {
     }
     async execute(ctx, state) {
         if (ctx.config.skipFetch) {
-            return { status: "skipped", reason: "--skip-fetch" };
+            return { status: "skipped", reason: "--no-fetch" };
         }
         const start = Date.now();
         // Load tasks — use the same source as GenerateConfigsStep to avoid

package/dist/orchestration/steps/index.d.ts CHANGED Viewed

@@ -6,14 +6,12 @@
  */
 export { CalculateScoresStep } from "./calculate-scores-step.js";
 export { CompareStep } from "./compare-step.js";
-export { DiscoveryReportStep } from "./discovery-report-step.js";
 export { FetchDocsStep } from "./fetch-docs-step.js";
 export { GapAnalysisStep } from "./gap-analysis-step.js";
 export { GenerateConfigsStep } from "./generate-configs-step.js";
 export { MirrorRepoTasksStep } from "./mirror-repo-tasks-step.js";
 export { GraderConsistencyStep } from "./grader-consistency-step.js";
 export { PublishReportStep } from "./publish-report-step.js";
-export { ReadinessStep } from "./readiness-step.js";
 export { ReportStep } from "./report-step.js";
 export { RunEvalStep } from "./run-eval-step.js";
 export { ValidateStep } from "./validate-step.js";

package/dist/orchestration/steps/index.js CHANGED Viewed

@@ -6,14 +6,12 @@
  */
 export { CalculateScoresStep } from "./calculate-scores-step.js";
 export { CompareStep } from "./compare-step.js";
-export { DiscoveryReportStep } from "./discovery-report-step.js";
 export { FetchDocsStep } from "./fetch-docs-step.js";
 export { GapAnalysisStep } from "./gap-analysis-step.js";
 export { GenerateConfigsStep } from "./generate-configs-step.js";
 export { MirrorRepoTasksStep } from "./mirror-repo-tasks-step.js";
 export { GraderConsistencyStep } from "./grader-consistency-step.js";
 export { PublishReportStep } from "./publish-report-step.js";
-export { ReadinessStep } from "./readiness-step.js";
 export { ReportStep } from "./report-step.js";
 export { RunEvalStep } from "./run-eval-step.js";
 export { ValidateStep } from "./validate-step.js";

package/dist/orchestration/steps/run-eval-step.js CHANGED Viewed

@@ -25,7 +25,7 @@ export class RunEvalStep {
     }
     async execute(ctx, state) {
         if (ctx.config.skipEval) {
-            return { status: "skipped", reason: "--skip-eval" };
+            return { status: "skipped", reason: "--no-eval" };
         }
         const start = Date.now();
         const { rootDir, debug, concurrency, noCache } = ctx.config;

package/dist/pipeline/cache.d.ts CHANGED Viewed

@@ -48,7 +48,7 @@ export interface CacheStats {
     hits: number;
     /** Steps where cache was missed (executed normally) */
     misses: number;
-    /** Steps that were skipped for other reasons (--skip-fetch, etc.) */
+    /** Steps that were skipped for other reasons (--no-fetch, etc.) */
     skipped: number;
     /** Per-step detail */
     steps: Record<string, "disabled" | "hit" | "miss" | "skipped">;

package/dist/pipeline/map-request-to-config.js CHANGED Viewed

@@ -47,8 +47,6 @@ export function mapRequestToConfig(request, rootDir) {
         compareThreshold: request.compareThreshold,
         compareBaseline: request.compareBaseline,
         gapAnalysisEnabled: request.gapAnalysis ?? true,
-        readinessEnabled: request.readiness ?? false,
-        discoveryReportEnabled: request.discoveryReport ?? false,
         publishEnabled: request.publish ?? publishDefault,
         publishTag: request.publishTag,
         noAutoScope: request.noAutoScope ?? false,

package/dist/pipeline/mirror-repo-tasks.d.ts CHANGED Viewed

@@ -107,7 +107,7 @@ export declare function buildMirrorDocument(task: LiteracyTaskDefinition, opts:
     slugToDocId: Map<string, string>;
 }): {
     baseline?: {
-        rubric?: "full" | "abbreviated" | "none" | undefined;
+        rubric?: "abbreviated" | "full" | "none" | undefined;
         enabled?: boolean | undefined;
     } | undefined;
     _id: string;