npm - @sanity/ailf - Versions diffs - 3.7.0 → 3.8.1 - Mend

@sanity/ailf 3.7.0 → 3.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

package/config/airbyte/ai_literacy_framework.connector.yaml +1 -1
package/config/thresholds.ts +3 -3
package/dist/_vendor/ailf-core/examples/index.d.ts +2 -2
package/dist/_vendor/ailf-core/examples/index.js +2 -2
package/dist/_vendor/ailf-core/ports/context.d.ts +0 -4
package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +38 -12
package/dist/_vendor/ailf-core/schemas/eval-config.js +102 -22
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +4 -6
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -3
package/dist/_vendor/ailf-core/schemas/schedules.d.ts +2 -2
package/dist/_vendor/ailf-shared/run-classification.d.ts +2 -2
package/dist/_vendor/ailf-shared/run-classification.js +1 -1
package/dist/_vendor/ailf-shared/run-context.d.ts +1 -1
package/dist/adapters/api-client/build-request.d.ts +0 -2
package/dist/adapters/api-client/build-request.js +2 -6
package/dist/adapters/config-sources/cli-config-adapter.d.ts +1 -1
package/dist/adapters/config-sources/file-config-adapter.d.ts +1 -1
package/dist/adapters/config-sources/file-config-adapter.js +42 -17
package/dist/adapters/task-sources/repo-schemas.d.ts +41 -3
package/dist/adapters/task-sources/repo-schemas.js +127 -0
package/dist/cli-program.d.ts +39 -0
package/dist/cli-program.js +137 -0
package/dist/cli.d.ts +8 -2
package/dist/cli.js +128 -142
package/dist/commands/agent-report.js +1 -1
package/dist/commands/calculate-scores.js +0 -2
package/dist/commands/check-staleness.js +1 -1
package/dist/commands/chronic-failures.js +4 -4
package/dist/commands/coverage-audit.js +6 -7
package/dist/commands/discovery-report.js +16 -4
package/dist/commands/eval.d.ts +1 -1
package/dist/commands/eval.js +1 -1
package/dist/commands/explain-handler.d.ts +1 -1
package/dist/commands/explain-handler.js +13 -44
package/dist/commands/fetch-docs.js +0 -2
package/dist/commands/generate-configs.js +0 -2
package/dist/commands/grader/index.js +3 -3
package/dist/commands/init.d.ts +2 -2
package/dist/commands/init.js +10 -9
package/dist/commands/interactive.d.ts +1 -1
package/dist/commands/interactive.js +8 -8
package/dist/commands/pipeline-action.d.ts +1 -3
package/dist/commands/pipeline-action.js +174 -140
package/dist/commands/pr-comment.js +1 -3
package/dist/commands/publish.d.ts +1 -1
package/dist/commands/publish.js +2 -4
package/dist/commands/readiness-report.js +17 -8
package/dist/commands/remote-pipeline.d.ts +1 -1
package/dist/commands/remote-pipeline.js +1 -3
package/dist/commands/run.d.ts +64 -0
package/dist/commands/{pipeline.js → run.js} +19 -30
package/dist/commands/shared/help.js +4 -4
package/dist/commands/shared/options.d.ts +29 -3
package/dist/commands/shared/options.js +37 -13
package/dist/commands/validate-tasks.js +1 -1
package/dist/commands/validate.d.ts +1 -1
package/dist/commands/validate.js +2 -2
package/dist/commands/weekly-digest.js +3 -3
package/dist/config/thresholds.ts +3 -3
package/dist/orchestration/build-app-context.js +0 -2
package/dist/orchestration/build-step-sequence.js +1 -11
package/dist/orchestration/steps/fetch-docs-step.js +1 -1
package/dist/orchestration/steps/index.d.ts +0 -2
package/dist/orchestration/steps/index.js +0 -2
package/dist/orchestration/steps/run-eval-step.js +1 -1
package/dist/pipeline/cache.d.ts +1 -1
package/dist/pipeline/map-request-to-config.js +0 -2
package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
package/dist/pipeline/plan.d.ts +2 -4
package/dist/pipeline/plan.js +4 -32
package/dist/pipeline/run-context.d.ts +1 -1
package/dist/pipeline/run-context.js +4 -4
package/dist/pipeline/validate.d.ts +1 -1
package/dist/pipeline/validate.js +1 -1
package/package.json +11 -9
package/dist/commands/pipeline.d.ts +0 -77
package/dist/orchestration/steps/discovery-report-step.d.ts +0 -13
package/dist/orchestration/steps/discovery-report-step.js +0 -62
package/dist/orchestration/steps/readiness-step.d.ts +0 -13
package/dist/orchestration/steps/readiness-step.js +0 -98
package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +0 -10
package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +0 -366
package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +0 -9
package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +0 -145
package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +0 -10
package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +0 -314
package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +0 -10
package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +0 -486
package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +0 -10
package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +0 -425
package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +0 -9
package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +0 -332
package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +0 -12
package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +0 -210
package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +0 -7
package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +0 -404
package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +0 -10
package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +0 -184
package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +0 -8
package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +0 -301
package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +0 -9
package/dist/pipeline/compiler/__tests__/telemetry.test.js +0 -503
package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +0 -10
package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +0 -509

package/dist/adapters/config-sources/file-config-adapter.js CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * FileConfigAdapter — resolves pipeline config from a local config file.
  *
- * Enables `ailf pipeline --config <path>` to load all pipeline options
+ * Enables `ailf run --config <path>` to load all pipeline options
  * from a file instead of CLI flags. Supports multiple formats in
  * priority order:
  *
@@ -47,7 +47,7 @@ export class FileConfigAdapter {
             return this.validateAndMap(result.value, ext);
         }
         // YAML / JSON files — load via fs
-        const raw = readConfigFile(this.filePath);
+        const raw = await readConfigFile(this.filePath);
         return this.validateAndMap(raw, ext);
     }
     /**
@@ -69,13 +69,12 @@ export class FileConfigAdapter {
 // ---------------------------------------------------------------------------
 // Helpers
 // ---------------------------------------------------------------------------
-function readConfigFile(filePath) {
+async function readConfigFile(filePath) {
     const content = readFileSync(filePath, "utf-8");
     const ext = extname(filePath).toLowerCase();
     if (ext === ".yaml" || ext === ".yml") {
-        // Dynamic import for yaml parser — only needed when reading YAML configs
-        // eslint-disable-next-line @typescript-eslint/no-require-imports
-        const { parse } = require("yaml");
+        // Dynamic ESM import — only loaded when reading YAML configs.
+        const { parse } = await import("yaml");
         return parse(content);
     }
     return JSON.parse(content);
@@ -91,9 +90,16 @@ function mapEvalConfigToResolvedConfig(config, rootDir) {
     }
     // Normalize legacy mode names (e.g., "full" → literacy + variant)
     const normalized = normalizeMode(config.mode ?? "literacy");
+    // Output directory (W0077 Phase 6c) — resolve `output.dir` relative to the
+    // rootDir (the caller's workspace, set by the FileConfigAdapter caller).
+    // When unset, fall back to <rootDir>/.ailf/results/latest/ to mirror the
+    // CLI's default. This matches `resolveOutputDir` for the auto-load path.
+    const outputDir = config.output?.dir
+        ? resolve(rootDir, config.output.dir)
+        : resolve(rootDir, ".ailf", "results", "latest");
     return {
         rootDir,
-        outputDir: resolve(rootDir, "results", "latest"),
+        outputDir,
         mode: normalized.mode,
         variant: normalized.variant,
         noAutoScope: config.noAutoScope ?? false,
@@ -107,21 +113,40 @@ function mapEvalConfigToResolvedConfig(config, rootDir) {
         compareEnabled: config.compare ?? false,
         compareThreshold: config.compareThreshold,
         compareBaseline: config.compareBaseline,
-        gapAnalysisEnabled: config.gapAnalysis ?? true,
-        readinessEnabled: config.readiness ?? false,
-        discoveryReportEnabled: config.discoveryReport ?? false,
-        publishEnabled: config.publish ?? false,
-        publishTag: config.publishTag,
+        gapAnalysisEnabled: config.execution?.gapAnalysis ?? true,
+        // W0077 Phase 4 — `publish` is now a policy object. Map the auto value
+        // directly to a boolean for the file-config path; the runtime
+        // smart-default logic in pipeline-action.ts isn't relevant here because
+        // the user has explicitly handed us a config file.
+        publishEnabled: config.publish?.auto === "never"
+            ? false
+            : config.publish?.auto !== undefined,
+        publishTag: config.publish?.tag,
         noCache: config.noCache ?? false,
         noRemoteCache: config.noRemoteCache ?? false,
-        graderReplications: config.graderReplications,
+        graderReplications: config.execution?.graderReplications,
         urls: config.urls,
-        headers: config.headers,
-        allowedOrigins: config.allowedOrigins,
+        headers: config.agentic?.headers,
+        allowedOrigins: config.agentic?.allowedOrigins,
         searchMode: config.searchMode ?? "open",
-        concurrency: config.concurrency,
+        concurrency: config.execution?.concurrency,
         remote: false,
-        apiUrl: "https://ailf-api.sanity.build",
+        apiUrl: config.execution?.apiUrl ?? "https://ailf-api.sanity.build",
+        // W0077 Phase 6g — artifact writer settings. `enabled: false` flips
+        // `artifactsDisabled` so composition-root selects the NoOp writer.
+        artifactsDisabled: config.artifacts?.enabled === false ? true : undefined,
+        artifactsDir: config.artifacts?.dir
+            ? resolve(rootDir, config.artifacts.dir)
+            : undefined,
+        artifactsExclude: config.artifacts?.exclude,
+        // W0077 Phase 6h — task-source selection. Default is content-lake
+        // (signaled by `taskSourceType` undefined); when `repo`, the
+        // composition-root resolves `repoTasksPath` (defaulting to
+        // `<rootDir>/.ailf/tasks/` when unset).
+        taskSourceType: config.taskSource?.type === "repo" ? "repo" : undefined,
+        repoTasksPath: config.taskSource?.repoTasksPath
+            ? resolve(rootDir, config.taskSource.repoTasksPath)
+            : undefined,
         presets: config.presets,
     };
 }

package/dist/adapters/task-sources/repo-schemas.d.ts CHANGED Viewed

@@ -147,8 +147,8 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
     baseline: z.ZodOptional<z.ZodObject<{
         enabled: z.ZodOptional<z.ZodBoolean>;
         rubric: z.ZodOptional<z.ZodEnum<{
-            full: "full";
             abbreviated: "abbreviated";
+            full: "full";
             none: "none";
         }>>;
     }, z.core.$strip>>;
@@ -773,8 +773,8 @@ export declare const ContentLakeAuthorableTaskSchema: z.ZodObject<{
     baseline: z.ZodOptional<z.ZodObject<{
         enabled: z.ZodOptional<z.ZodBoolean>;
         rubric: z.ZodOptional<z.ZodEnum<{
-            full: "full";
             abbreviated: "abbreviated";
+            full: "full";
             none: "none";
         }>>;
     }, z.core.$strip>>;
@@ -893,8 +893,8 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
     baseline: z.ZodOptional<z.ZodObject<{
         enabled: z.ZodOptional<z.ZodBoolean>;
         rubric: z.ZodOptional<z.ZodEnum<{
-            full: "full";
             abbreviated: "abbreviated";
+            full: "full";
             none: "none";
         }>>;
     }, z.core.$strip>>;
@@ -1434,11 +1434,49 @@ export declare const RepoConfigSchema: z.ZodObject<{
         projectId: z.ZodOptional<z.ZodString>;
         dataset: z.ZodOptional<z.ZodString>;
         baseUrl: z.ZodOptional<z.ZodString>;
+        studioOrigin: z.ZodOptional<z.ZodString>;
     }, z.core.$strip>>;
     reportStore: z.ZodOptional<z.ZodObject<{
         projectId: z.ZodString;
         dataset: z.ZodString;
     }, z.core.$strip>>;
+    publish: z.ZodOptional<z.ZodObject<{
+        auto: z.ZodOptional<z.ZodEnum<{
+            never: "never";
+            always: "always";
+            "full-runs": "full-runs";
+        }>>;
+        tag: z.ZodOptional<z.ZodString>;
+    }, z.core.$strip>>;
+    execution: z.ZodOptional<z.ZodObject<{
+        concurrency: z.ZodOptional<z.ZodNumber>;
+        graderReplications: z.ZodOptional<z.ZodNumber>;
+        gapAnalysis: z.ZodOptional<z.ZodBoolean>;
+        apiUrl: z.ZodOptional<z.ZodString>;
+    }, z.core.$strip>>;
+    output: z.ZodOptional<z.ZodObject<{
+        dir: z.ZodOptional<z.ZodString>;
+    }, z.core.$strip>>;
+    owner: z.ZodOptional<z.ZodObject<{
+        team: z.ZodOptional<z.ZodString>;
+        individual: z.ZodOptional<z.ZodString>;
+    }, z.core.$strip>>;
+    agentic: z.ZodOptional<z.ZodObject<{
+        headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
+        allowedOrigins: z.ZodOptional<z.ZodArray<z.ZodString>>;
+    }, z.core.$strip>>;
+    artifacts: z.ZodOptional<z.ZodObject<{
+        enabled: z.ZodOptional<z.ZodBoolean>;
+        dir: z.ZodOptional<z.ZodString>;
+        exclude: z.ZodOptional<z.ZodArray<z.ZodString>>;
+    }, z.core.$strip>>;
+    taskSource: z.ZodOptional<z.ZodObject<{
+        type: z.ZodOptional<z.ZodEnum<{
+            "content-lake": "content-lake";
+            repo: "repo";
+        }>>;
+        repoTasksPath: z.ZodOptional<z.ZodString>;
+    }, z.core.$strip>>;
     triggers: z.ZodOptional<z.ZodObject<{
         pr: z.ZodOptional<z.ZodObject<{
             mode: z.ZodDefault<z.ZodEnum<{

package/dist/adapters/task-sources/repo-schemas.js CHANGED Viewed

@@ -427,12 +427,17 @@ const ScheduleTriggerSchema = TriggerConfigSchema.extend({
 /**
  * Documentation source configuration.
  * Defines which Sanity project holds the documentation being evaluated.
+ *
+ * `studioOrigin` (W0077 Phase 6d) replaces the retired
+ * `--sanity-studio-origin` CLI flag on `ailf run`. The `SANITY_STUDIO_ORIGIN`
+ * env var still wins over this value at resolution time.
  */
 const SourceConfigSchema = z
     .object({
     projectId: z.string().min(1).optional(),
     dataset: z.string().min(1).optional(),
     baseUrl: z.string().url().optional(),
+    studioOrigin: z.string().url().optional(),
 })
     .optional();
 /**
@@ -445,6 +450,121 @@ const ReportStoreConfigSchema = z
     dataset: z.string().min(1),
 })
     .optional();
+/**
+ * Publish policy. Controls when `ailf run` writes a report to the Content
+ * Lake without an explicit `--publish` / `--no-publish` flag.
+ *
+ * - `auto: "always"`    — publish any run with a configured report store
+ * - `auto: "full-runs"` — publish non-debug runs (default)
+ * - `auto: "never"`     — never auto-publish; users must pass --publish
+ *
+ * `tag` is a default value for `--publish-tag` when not passed at the CLI.
+ *
+ * @see docs/design-docs/pipeline-command-surface.md §5.3
+ */
+const PublishConfigSchema = z
+    .object({
+    auto: z.enum(["always", "full-runs", "never"]).optional(),
+    tag: z.string().optional(),
+})
+    .optional();
+/**
+ * Execution-tier configuration. Per-environment values that the four
+ * retired CLI flags used to set: concurrency, grader replications, gap
+ * analysis toggle, and the AILF API URL.
+ *
+ * @see docs/design-docs/pipeline-command-surface.md §5.7 (W0077 Phase 6b)
+ */
+const ExecutionConfigSchema = z
+    .object({
+    concurrency: z.number().int().positive().optional(),
+    graderReplications: z.number().int().positive().optional(),
+    gapAnalysis: z.boolean().optional(),
+    apiUrl: z.string().url().optional(),
+})
+    .optional();
+/**
+ * Task-source configuration (W0077 Phase 6h). Replaces the retired
+ * `--task-source` and `--repo-tasks-path` CLI flags on `ailf run`.
+ *
+ * - `type`           — `content-lake` (default) or `repo`. When `repo`,
+ *                      tasks load from `repoTasksPath` (or
+ *                      `<cwd>/.ailf/tasks/` if unset).
+ * - `repoTasksPath`  — optional explicit path. Resolved relative to the
+ *                      caller's cwd. Required to exist on disk.
+ *
+ * No env-var fallback today; cascade is config-file → built-in default.
+ */
+const TaskSourceConfigSchema = z
+    .object({
+    type: z.enum(["content-lake", "repo"]).optional(),
+    repoTasksPath: z.string().min(1).optional(),
+})
+    .optional();
+/**
+ * Artifact-writer configuration (W0077 Phase 6g). Replaces the retired
+ * `--no-artifacts`, `--artifacts-dir`, and `--artifacts-exclude` CLI flags
+ * on `ailf run`. The `AILF_ARTIFACTS_DIR` env var still wins over
+ * `artifacts.dir` at resolution time. `artifacts.enabled` defaults to
+ * `true` (writers attached); set `false` to disable all writers (mirrors
+ * the legacy `--no-artifacts` semantics).
+ *
+ * Other commands (`ailf runs export`, etc.) keep their `--artifacts-dir`
+ * flag — that's a "read from this directory" override, distinct from the
+ * pipeline's write-side `artifacts.dir`.
+ */
+const ArtifactsConfigSchema = z
+    .object({
+    enabled: z.boolean().optional(),
+    dir: z.string().min(1).optional(),
+    exclude: z.array(z.string().min(1)).optional(),
+})
+    .optional();
+/**
+ * Owner attribution (W0077 Phase 6f). Replaces the retired `--owner-team`
+ * and `--owner-individual` CLI flags. Both feed the D0037 caller envelope
+ * that surfaces in remote-mode runs. Env vars `AILF_OWNER_TEAM` and
+ * `AILF_OWNER_INDIVIDUAL` still win over these values at resolution time.
+ */
+const OwnerConfigSchema = z
+    .object({
+    team: z.string().min(1).optional(),
+    individual: z.string().min(1).optional(),
+})
+    .optional();
+/**
+ * Agentic-mode configuration (W0077 Phase 6f). Replaces the retired
+ * `--header` and `--allowed-origin` CLI flags. `headers` is a key/value
+ * object (mirrors `DOC_HEADERS` env-var JSON shape); `allowedOrigins` is a
+ * list of origin globs. The `DOC_HEADERS` and `DOC_ALLOWED_ORIGIN(S)` env
+ * vars still apply downstream as additive merges.
+ */
+const AgenticConfigSchema = z
+    .object({
+    headers: z.record(z.string(), z.string()).optional(),
+    allowedOrigins: z.array(z.string().min(1)).optional(),
+})
+    .optional();
+/**
+ * Output-directory configuration. Replaces the retired `--output-dir`
+ * CLI flag on `ailf run`. Resolution order:
+ *
+ *   .ailf/config.yaml `output.dir` > built-in default
+ *
+ * Path is resolved relative to the caller's cwd. The built-in default is
+ * `<cwd>/.ailf/results/latest/` (see resolve-output-dir.ts). Other commands
+ * (`ailf publish`, `ailf pr-comment`, etc.) keep their `--output-dir`
+ * flag — that's a "read from this directory" override, distinct from the
+ * pipeline's write-side `output.dir`.
+ *
+ * @see docs/design-docs/pipeline-command-surface.md §5.7 (W0077 Phase 6c)
+ * @see docs/design-docs/output-dir-routing.md
+ */
+const OutputConfigSchema = z
+    .object({
+    dir: z.string().min(1).optional(),
+})
+    .optional();
 /**
  * Zod schema for .ailf/config.yaml — controls documentation source,
  * report destination, and trigger behavior for evaluations from an
@@ -453,6 +573,13 @@ const ReportStoreConfigSchema = z
 export const RepoConfigSchema = z.object({
     source: SourceConfigSchema,
     reportStore: ReportStoreConfigSchema,
+    publish: PublishConfigSchema,
+    execution: ExecutionConfigSchema,
+    output: OutputConfigSchema,
+    owner: OwnerConfigSchema,
+    agentic: AgenticConfigSchema,
+    artifacts: ArtifactsConfigSchema,
+    taskSource: TaskSourceConfigSchema,
     triggers: z
         .object({
         pr: TriggerConfigSchema.optional(),

package/dist/cli-program.d.ts ADDED Viewed

@@ -0,0 +1,39 @@
+/**
+ * cli-program.ts — pure factory for the AILF Commander program.
+ *
+ * Splits the program construction out of cli.ts so the CLI is testable
+ * in-process. cli.ts owns bootstrap side effects (dotenv loading,
+ * retired-flag/env/cmd checks, AILF_LOG_LEVEL pre-scan, parseAsync); this
+ * module owns command wiring.
+ *
+ * The W0078 M4 black-box harness imports `buildCliProgram()` directly so
+ * tests can construct a fresh program, attach `exitOverride()`, capture
+ * stdout/stderr, and parse a synthetic argv — all without spawning a
+ * subprocess.
+ *
+ * @see packages/eval/src/__tests__/cli-harness/run-cli.ts
+ */
+import { Command } from "commander";
+/**
+ * Options for `buildCliProgram`.
+ */
+export interface BuildCliProgramOptions {
+    /**
+     * Path to the eval package root (the directory containing package.json).
+     * Used to resolve the version string and as the root passed to the
+     * `--explain` handler.
+     */
+    evalRoot: string;
+}
+/**
+ * Construct the Commander program with every subcommand registered.
+ *
+ * Pure factory — no I/O beyond reading package.json for the version, no
+ * `process.exit()`, no `process.argv` access. Tests can call this and
+ * attach `program.exitOverride()` before parsing to capture exit codes
+ * instead of terminating the process.
+ *
+ * Registration order determines group display order in `--help`. Commands
+ * within a group appear in the order they're added.
+ */
+export declare function buildCliProgram(opts: BuildCliProgramOptions): Command;

package/dist/cli-program.js ADDED Viewed

@@ -0,0 +1,137 @@
+/**
+ * cli-program.ts — pure factory for the AILF Commander program.
+ *
+ * Splits the program construction out of cli.ts so the CLI is testable
+ * in-process. cli.ts owns bootstrap side effects (dotenv loading,
+ * retired-flag/env/cmd checks, AILF_LOG_LEVEL pre-scan, parseAsync); this
+ * module owns command wiring.
+ *
+ * The W0078 M4 black-box harness imports `buildCliProgram()` directly so
+ * tests can construct a fresh program, attach `exitOverride()`, capture
+ * stdout/stderr, and parse a synthetic argv — all without spawning a
+ * subprocess.
+ *
+ * @see packages/eval/src/__tests__/cli-harness/run-cli.ts
+ */
+import { Command } from "commander";
+import { readFileSync } from "node:fs";
+import { resolve } from "node:path";
+import { CommandGroup, configureProgram } from "./commands/shared/help.js";
+import { createAgentReportCommand } from "./commands/agent-report.js";
+import { createBaselineCommand } from "./commands/baseline.js";
+import { createCacheCommand } from "./commands/cache.js";
+import { createCalculateScoresCommand } from "./commands/calculate-scores.js";
+import { createCheckStalenessCommand } from "./commands/check-staleness.js";
+import { createChronicFailuresCommand } from "./commands/chronic-failures.js";
+import { createCompareCommand } from "./commands/compare.js";
+import { createCompletionCommand } from "./commands/completion.js";
+import { createCoverageAuditCommand } from "./commands/coverage-audit.js";
+import { createDiscoveryReportCommand } from "./commands/discovery-report.js";
+import { createEvalCommand } from "./commands/eval.js";
+import { createFetchDocsCommand } from "./commands/fetch-docs.js";
+import { createGenerateConfigsCommand } from "./commands/generate-configs.js";
+import { createGraderCommand } from "./commands/grader/index.js";
+import { createInitCommand } from "./commands/init.js";
+import { createInteractiveCommand } from "./commands/interactive.js";
+import { createLookupDocCommand } from "./commands/lookup-doc.js";
+import { createMeasureRetrievalCommand } from "./commands/measure-retrieval.js";
+import { createPrCommentCommand } from "./commands/pr-comment.js";
+import { createPublishCommand } from "./commands/publish.js";
+import { createReadinessReportCommand } from "./commands/readiness-report.js";
+import { createRunCommand } from "./commands/run.js";
+import { createRunsCommand } from "./commands/runs.js";
+import { createValidateConfigCommand } from "./commands/validate.js";
+import { createValidateTasksCommand } from "./commands/validate-tasks.js";
+import { createWebhookServerCommand } from "./commands/webhook-server.js";
+import { createWeeklyDigestCommand } from "./commands/weekly-digest.js";
+/**
+ * Construct the Commander program with every subcommand registered.
+ *
+ * Pure factory — no I/O beyond reading package.json for the version, no
+ * `process.exit()`, no `process.argv` access. Tests can call this and
+ * attach `program.exitOverride()` before parsing to capture exit codes
+ * instead of terminating the process.
+ *
+ * Registration order determines group display order in `--help`. Commands
+ * within a group appear in the order they're added.
+ */
+export function buildCliProgram(opts) {
+    const { evalRoot } = opts;
+    const pkgPath = resolve(evalRoot, "package.json");
+    const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
+    const program = new Command()
+        .name("ailf")
+        .description("AI Literacy Framework — evaluate how well docs enable AI coding tools\n\nMeasure whether AI coding agents can find the right documentation\nand produce correct implementations of your product features.")
+        .version(pkg.version)
+        .option("-v, --verbose", "Increase log output")
+        .option("-q, --quiet", "Suppress non-error output")
+        .option("--dotenv <path>", "Override default .env file path")
+        .option("--explain", "Show execution plan without running")
+        .option("--format <fmt>", "Output format for --explain (console, json)", "console")
+        .option("-y, --yes", "With --explain: show plan then prompt to confirm execution");
+    configureProgram(program);
+    // Global --explain hook — intercepts any command before execution
+    program.hook("preAction", async (thisCommand, actionCommand) => {
+        const globalOpts = thisCommand.opts();
+        if (!globalOpts.explain)
+            return;
+        const { handleExplain } = await import("./commands/explain-handler.js");
+        try {
+            await handleExplain(actionCommand, globalOpts.yes ?? false, evalRoot);
+            process.exit(0);
+        }
+        catch (err) {
+            // Sentinel from --yes confirmation: user wants to proceed
+            if (err !== null &&
+                typeof err === "object" &&
+                "__proceedArgv" in err) {
+                const filteredArgv = err.__proceedArgv;
+                console.log("\n  ▸ Proceeding with execution…\n");
+                await program.parseAsync(filteredArgv);
+                return;
+            }
+            throw err;
+        }
+    });
+    // ── Core Workflow ──────────────────────────────────────────────────────
+    program.addCommand(createRunCommand().helpGroup(CommandGroup.CoreWorkflow));
+    program.addCommand(createCompareCommand().helpGroup(CommandGroup.CoreWorkflow));
+    program.addCommand(createBaselineCommand().helpGroup(CommandGroup.CoreWorkflow));
+    program.addCommand(createPublishCommand().helpGroup(CommandGroup.CoreWorkflow));
+    program.addCommand(createRunsCommand().helpGroup(CommandGroup.CoreWorkflow));
+    // ── Analysis & Reports ────────────────────────────────────────────────
+    const reportCommand = new Command("report")
+        .description("Generate analysis and reporting outputs from evaluation runs")
+        .addCommand(createReadinessReportCommand())
+        .addCommand(createChronicFailuresCommand())
+        .addCommand(createCoverageAuditCommand())
+        .addCommand(createDiscoveryReportCommand())
+        .addCommand(createAgentReportCommand())
+        .addCommand(createWeeklyDigestCommand())
+        .addCommand(createCheckStalenessCommand());
+    program.addCommand(reportCommand.helpGroup(CommandGroup.AnalysisReports));
+    // ── Grader Reliability ────────────────────────────────────────────────
+    program.addCommand(createGraderCommand().helpGroup(CommandGroup.GraderReliability));
+    // ── Setup & Configuration ─────────────────────────────────────────────
+    program.addCommand(createInitCommand().helpGroup(CommandGroup.SetupConfig));
+    const validateCommand = new Command("validate")
+        .description("Validate AILF configuration and task files")
+        .addCommand(createValidateConfigCommand())
+        .addCommand(createValidateTasksCommand());
+    program.addCommand(validateCommand.helpGroup(CommandGroup.SetupConfig));
+    program.addCommand(createFetchDocsCommand().helpGroup(CommandGroup.SetupConfig));
+    program.addCommand(createCacheCommand().helpGroup(CommandGroup.SetupConfig));
+    // ── Pipeline Internals ────────────────────────────────────────────────
+    program.addCommand(createEvalCommand().helpGroup(CommandGroup.PipelineInternals));
+    program.addCommand(createCalculateScoresCommand().helpGroup(CommandGroup.PipelineInternals));
+    program.addCommand(createPrCommentCommand().helpGroup(CommandGroup.PipelineInternals));
+    program.addCommand(createGenerateConfigsCommand().helpGroup(CommandGroup.PipelineInternals));
+    program.addCommand(createMeasureRetrievalCommand().helpGroup(CommandGroup.PipelineInternals));
+    program.addCommand(createLookupDocCommand().helpGroup(CommandGroup.PipelineInternals));
+    program.addCommand(createWebhookServerCommand().helpGroup(CommandGroup.PipelineInternals));
+    // ── Developer Tools ───────────────────────────────────────────────────
+    program.addCommand(createInteractiveCommand().helpGroup(CommandGroup.DeveloperTools));
+    // Shell completion — must be registered last (needs full program tree)
+    program.addCommand(createCompletionCommand(program).helpGroup(CommandGroup.DeveloperTools));
+    return program;
+}

package/dist/cli.d.ts CHANGED Viewed

@@ -11,7 +11,7 @@
  * appends Quick Start examples.
  *
  * Usage:
- *   ailf pipeline [flags]       # full evaluation pipeline
+ *   ailf run [flags]            # full evaluation run
  *   ailf compare [flags]        # compare evaluation runs
  *   ailf baseline <cmd> [flags] # baseline management
  *   ailf validate [flags]       # config validation
@@ -24,6 +24,12 @@
  *   --dotenv <path>             # override default .env path
  *
  * Dev mode (without building):
- *   tsx src/cli.ts pipeline --debug
+ *   tsx src/cli.ts run --debug
+ *
+ * Module split: this file owns *bootstrap side effects* (dotenv,
+ * retired-flag/env/cmd checks, AILF_LOG_LEVEL pre-scan, parseAsync).
+ * The Commander wiring lives in ./cli-program.ts so the W0078 M4 black-box
+ * harness can build the program in-process without firing those side
+ * effects.
  */
 export {};