@sanity/ailf 3.7.0 → 3.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/config/airbyte/ai_literacy_framework.connector.yaml +1 -1
  2. package/config/thresholds.ts +3 -3
  3. package/dist/_vendor/ailf-core/examples/index.d.ts +2 -2
  4. package/dist/_vendor/ailf-core/examples/index.js +2 -2
  5. package/dist/_vendor/ailf-core/ports/context.d.ts +0 -4
  6. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +38 -12
  7. package/dist/_vendor/ailf-core/schemas/eval-config.js +102 -22
  8. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +4 -6
  9. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -3
  10. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +2 -2
  11. package/dist/_vendor/ailf-shared/run-classification.d.ts +2 -2
  12. package/dist/_vendor/ailf-shared/run-classification.js +1 -1
  13. package/dist/_vendor/ailf-shared/run-context.d.ts +1 -1
  14. package/dist/adapters/api-client/build-request.d.ts +0 -2
  15. package/dist/adapters/api-client/build-request.js +2 -6
  16. package/dist/adapters/config-sources/cli-config-adapter.d.ts +1 -1
  17. package/dist/adapters/config-sources/file-config-adapter.d.ts +1 -1
  18. package/dist/adapters/config-sources/file-config-adapter.js +42 -17
  19. package/dist/adapters/task-sources/repo-schemas.d.ts +41 -3
  20. package/dist/adapters/task-sources/repo-schemas.js +127 -0
  21. package/dist/cli-program.d.ts +39 -0
  22. package/dist/cli-program.js +137 -0
  23. package/dist/cli.d.ts +8 -2
  24. package/dist/cli.js +128 -142
  25. package/dist/commands/agent-report.js +1 -1
  26. package/dist/commands/calculate-scores.js +0 -2
  27. package/dist/commands/check-staleness.js +1 -1
  28. package/dist/commands/chronic-failures.js +4 -4
  29. package/dist/commands/coverage-audit.js +6 -7
  30. package/dist/commands/discovery-report.js +16 -4
  31. package/dist/commands/eval.d.ts +1 -1
  32. package/dist/commands/eval.js +1 -1
  33. package/dist/commands/explain-handler.d.ts +1 -1
  34. package/dist/commands/explain-handler.js +13 -44
  35. package/dist/commands/fetch-docs.js +0 -2
  36. package/dist/commands/generate-configs.js +0 -2
  37. package/dist/commands/grader/index.js +3 -3
  38. package/dist/commands/init.d.ts +2 -2
  39. package/dist/commands/init.js +10 -9
  40. package/dist/commands/interactive.d.ts +1 -1
  41. package/dist/commands/interactive.js +8 -8
  42. package/dist/commands/pipeline-action.d.ts +1 -3
  43. package/dist/commands/pipeline-action.js +174 -140
  44. package/dist/commands/pr-comment.js +1 -3
  45. package/dist/commands/publish.d.ts +1 -1
  46. package/dist/commands/publish.js +2 -4
  47. package/dist/commands/readiness-report.js +17 -8
  48. package/dist/commands/remote-pipeline.d.ts +1 -1
  49. package/dist/commands/remote-pipeline.js +1 -3
  50. package/dist/commands/run.d.ts +64 -0
  51. package/dist/commands/{pipeline.js → run.js} +19 -30
  52. package/dist/commands/shared/help.js +4 -4
  53. package/dist/commands/shared/options.d.ts +29 -3
  54. package/dist/commands/shared/options.js +37 -13
  55. package/dist/commands/validate-tasks.js +1 -1
  56. package/dist/commands/validate.d.ts +1 -1
  57. package/dist/commands/validate.js +2 -2
  58. package/dist/commands/weekly-digest.js +3 -3
  59. package/dist/config/thresholds.ts +3 -3
  60. package/dist/orchestration/build-app-context.js +0 -2
  61. package/dist/orchestration/build-step-sequence.js +1 -11
  62. package/dist/orchestration/steps/fetch-docs-step.js +1 -1
  63. package/dist/orchestration/steps/index.d.ts +0 -2
  64. package/dist/orchestration/steps/index.js +0 -2
  65. package/dist/orchestration/steps/run-eval-step.js +1 -1
  66. package/dist/pipeline/cache.d.ts +1 -1
  67. package/dist/pipeline/map-request-to-config.js +0 -2
  68. package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
  69. package/dist/pipeline/plan.d.ts +2 -4
  70. package/dist/pipeline/plan.js +4 -32
  71. package/dist/pipeline/run-context.d.ts +1 -1
  72. package/dist/pipeline/run-context.js +4 -4
  73. package/dist/pipeline/validate.d.ts +1 -1
  74. package/dist/pipeline/validate.js +1 -1
  75. package/package.json +11 -9
  76. package/dist/commands/pipeline.d.ts +0 -77
  77. package/dist/orchestration/steps/discovery-report-step.d.ts +0 -13
  78. package/dist/orchestration/steps/discovery-report-step.js +0 -62
  79. package/dist/orchestration/steps/readiness-step.d.ts +0 -13
  80. package/dist/orchestration/steps/readiness-step.js +0 -98
  81. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +0 -10
  82. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +0 -366
  83. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +0 -9
  84. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +0 -145
  85. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +0 -10
  86. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +0 -314
  87. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +0 -10
  88. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +0 -486
  89. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +0 -10
  90. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +0 -425
  91. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +0 -9
  92. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +0 -332
  93. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +0 -12
  94. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +0 -210
  95. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +0 -7
  96. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +0 -404
  97. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +0 -10
  98. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +0 -184
  99. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +0 -8
  100. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +0 -301
  101. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +0 -9
  102. package/dist/pipeline/compiler/__tests__/telemetry.test.js +0 -503
  103. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +0 -10
  104. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +0 -509
package/dist/cli.js CHANGED
@@ -1,6 +1,4 @@
1
1
  #!/usr/bin/env node
2
- /* oxlint-disable import/first -- imports are intentionally interleaved with
3
- command registration for readability and lazy loading */
4
2
  /**
5
3
  * cli.ts — AILF CLI entry point.
6
4
  *
@@ -13,7 +11,7 @@
13
11
  * appends Quick Start examples.
14
12
  *
15
13
  * Usage:
16
- * ailf pipeline [flags] # full evaluation pipeline
14
+ * ailf run [flags] # full evaluation run
17
15
  * ailf compare [flags] # compare evaluation runs
18
16
  * ailf baseline <cmd> [flags] # baseline management
19
17
  * ailf validate [flags] # config validation
@@ -26,16 +24,21 @@
26
24
  * --dotenv <path> # override default .env path
27
25
  *
28
26
  * Dev mode (without building):
29
- * tsx src/cli.ts pipeline --debug
27
+ * tsx src/cli.ts run --debug
28
+ *
29
+ * Module split: this file owns *bootstrap side effects* (dotenv,
30
+ * retired-flag/env/cmd checks, AILF_LOG_LEVEL pre-scan, parseAsync).
31
+ * The Commander wiring lives in ./cli-program.ts so the W0078 M4 black-box
32
+ * harness can build the program in-process without firing those side
33
+ * effects.
30
34
  */
31
35
  import { config as dotenvConfig } from "dotenv";
32
- import { existsSync, readFileSync } from "fs";
36
+ import { existsSync } from "fs";
33
37
  import { dirname, resolve } from "path";
34
38
  import { fileURLToPath } from "url";
39
+ import { buildCliProgram } from "./cli-program.js";
35
40
  const __dirname = dirname(fileURLToPath(import.meta.url));
36
41
  const ROOT = resolve(__dirname, "..");
37
- /** Path to the eval package root (packages/eval). Used by --explain. */
38
- const EVAL_ROOT = ROOT;
39
42
  // ---------------------------------------------------------------------------
40
43
  // Load .env — must happen before Commander parses so that .env()
41
44
  // fallbacks resolve correctly.
@@ -76,22 +79,75 @@ else if (process.argv.includes("--quiet") || process.argv.includes("-q")) {
76
79
  process.env.AILF_LOG_LEVEL = "quiet";
77
80
  }
78
81
  // ---------------------------------------------------------------------------
79
- // W0052 — hard-error on retired capture flags and env vars.
80
- // --------------------------------------------------------------------------
81
- // The legacy collector has been removed. Callers still using
82
- // --capture / --capture-dir / --no-capture-compress / --no-capture-extras
83
- // or AILF_CAPTURE* / AILF_LEGACY_COLLECTOR / AILF_UNIFIED_ARTIFACTS must
84
- // migrate to --artifacts-dir / --no-artifacts / --artifacts-exclude. We
85
- // print a clear pointer so failures don't bubble up as opaque "unknown
86
- // option" errors from Commander.
82
+ // Hard-error on retired flags, env vars, and commands with a migration hint.
87
83
  // ---------------------------------------------------------------------------
88
- const RETIRED_FLAGS = [
89
- "--capture",
90
- "--capture-dir",
91
- "--no-capture-compress",
92
- "--no-capture-extras",
93
- "--capture-exclude",
94
- ];
84
+ // Each entry maps an old identifier to the message shown when it's seen, so
85
+ // failures don't bubble up as opaque "unknown option" errors from Commander.
86
+ // W0052 retired the legacy artifact collector; W0075 retired the --skip-*
87
+ // negation prefix, the --debug-{n,pattern,sample} filter flags, and several
88
+ // top-level report/validator commands that were consolidated into umbrellas.
89
+ // ---------------------------------------------------------------------------
90
+ const RETIRED_CAPTURE_HINT = " Use --artifacts-dir / --no-artifacts / --artifacts-exclude instead.\n" +
91
+ " See docs/guides/cli-guide.md and docs/decisions/D0033-unified-run-anchored-artifact-capture.md.";
92
+ const RETIRED_FLAG_HINTS = {
93
+ "--capture": RETIRED_CAPTURE_HINT,
94
+ "--capture-dir": RETIRED_CAPTURE_HINT,
95
+ "--no-capture-compress": RETIRED_CAPTURE_HINT,
96
+ "--no-capture-extras": RETIRED_CAPTURE_HINT,
97
+ "--capture-exclude": RETIRED_CAPTURE_HINT,
98
+ "--skip-fetch": " Use --no-fetch instead. See docs/design-docs/cli-naming-convention.md (W0075).",
99
+ "--skip-eval": " Use --no-eval instead. See docs/design-docs/cli-naming-convention.md (W0075).",
100
+ "--debug-n": " Use --filter-first-n instead. See docs/design-docs/cli-naming-convention.md (W0075).",
101
+ "--debug-pattern": " Use --filter-pattern instead. See docs/design-docs/cli-naming-convention.md (W0075).",
102
+ "--debug-sample": " Use --filter-sample instead. See docs/design-docs/cli-naming-convention.md (W0075).",
103
+ "--output-format": " Use --format instead. See docs/design-docs/cli-naming-convention.md (W0075).",
104
+ "--artifacts-dry-run": " Use --no-artifacts-write instead. See docs/design-docs/cli-naming-convention.md (W0075).",
105
+ "--readiness": " Use `ailf report readiness --from-run <path>` instead. See docs/design-docs/pipeline-command-surface.md (W0077).",
106
+ "--discovery-report": " Use `ailf report discovery --from-run <path>` instead. See docs/design-docs/pipeline-command-surface.md (W0077).",
107
+ "--compare-baseline": " Use `--compare <path>` instead. `--compare` now takes an optional baseline argument. See docs/design-docs/pipeline-command-surface.md (W0077).",
108
+ "--before": " Use --before-source instead. The flag was renamed to disambiguate from baseline comparison. See docs/design-docs/pipeline-command-surface.md (W0077).",
109
+ "--concurrency": " Set `execution.concurrency` in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6b).",
110
+ "--grader-replications": " Set `execution.graderReplications` in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6b).",
111
+ "--no-gap-analysis": " Set `execution.gapAnalysis: false` in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6b).",
112
+ "--api-url": " Set `execution.apiUrl` in .ailf/config.yaml or use the AILF_API_URL env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6b).",
113
+ "--report-dataset": " Set `reportStore.dataset` in .ailf/config.yaml or use the AILF_REPORT_DATASET env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6e).",
114
+ "--report-project": " Set `reportStore.projectId` in .ailf/config.yaml or use the AILF_REPORT_PROJECT_ID env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6e).",
115
+ "--owner-team": " Set `owner.team` in .ailf/config.yaml or use the AILF_OWNER_TEAM env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6f).",
116
+ "--owner-individual": " Set `owner.individual` in .ailf/config.yaml or use the AILF_OWNER_INDIVIDUAL env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6f).",
117
+ "--header": " Set `agentic.headers` (key/value object) in .ailf/config.yaml or use the DOC_HEADERS env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6f).",
118
+ "--allowed-origin": " Set `agentic.allowedOrigins` (list of globs) in .ailf/config.yaml or use the DOC_ALLOWED_ORIGINS env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6f).",
119
+ "--task-source": " Set `taskSource.type` (content-lake | repo) in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6h).",
120
+ "--repo-tasks-path": " Set `taskSource.repoTasksPath` in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6h).",
121
+ };
122
+ const RETIRED_COMMAND_HINTS = {
123
+ pipeline: " Use `ailf run` instead. See docs/design-docs/pipeline-command-surface.md (W0077).",
124
+ "validate-tasks": " Use `ailf validate tasks` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
125
+ "readiness-report": " Use `ailf report readiness` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
126
+ "chronic-failures": " Use `ailf report chronic-failures` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
127
+ "coverage-audit": " Use `ailf report coverage` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
128
+ "discovery-report": " Use `ailf report discovery` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
129
+ "agent-report": " Use `ailf report agent` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
130
+ "weekly-digest": " Use `ailf report digest` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
131
+ "check-staleness": " Use `ailf report staleness` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
132
+ };
133
+ /**
134
+ * Per-subcommand retired-flag hints. Use this for flags that were retired
135
+ * from one subcommand but still exist on others (e.g. `--output-dir` is
136
+ * retired from `ailf run` but still present on `ailf publish`,
137
+ * `ailf pr-comment`, etc.). Keys are subcommand names; values share the
138
+ * same shape as `RETIRED_FLAG_HINTS`.
139
+ */
140
+ const RETIRED_FLAG_HINTS_BY_COMMAND = {
141
+ run: {
142
+ "--output-dir": " Set `output.dir` in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6c).",
143
+ "--sanity-dataset": " Set `source.dataset` in .ailf/config.yaml or use the SANITY_DATASET env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6d).",
144
+ "--sanity-project": " Set `source.projectId` in .ailf/config.yaml or use the SANITY_PROJECT_ID env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6d).",
145
+ "--sanity-studio-origin": " Set `source.studioOrigin` in .ailf/config.yaml or use the SANITY_STUDIO_ORIGIN env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6d).",
146
+ "--no-artifacts": " Set `artifacts.enabled: false` in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6g).",
147
+ "--artifacts-dir": " Set `artifacts.dir` in .ailf/config.yaml or use the AILF_ARTIFACTS_DIR env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6g).",
148
+ "--artifacts-exclude": " Set `artifacts.exclude` (list of artifact-type names) in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6g).",
149
+ },
150
+ };
95
151
  const RETIRED_ENV_VARS = [
96
152
  "AILF_CAPTURE",
97
153
  "AILF_CAPTURE_DIR",
@@ -102,12 +158,42 @@ const RETIRED_ENV_VARS = [
102
158
  "AILF_LEGACY_COLLECTOR",
103
159
  "AILF_UNIFIED_ARTIFACTS",
104
160
  ];
161
+ /**
162
+ * Identify the subcommand the user invoked — the first non-flag arg after
163
+ * `ailf` (argv[0]=node, argv[1]=cli.ts). Returns undefined if none.
164
+ */
165
+ function findInvokedSubcommand() {
166
+ for (let i = 2; i < process.argv.length; i++) {
167
+ const arg = process.argv[i];
168
+ if (!arg.startsWith("-"))
169
+ return arg;
170
+ }
171
+ return undefined;
172
+ }
105
173
  function findRetiredFlag() {
174
+ const subcommand = findInvokedSubcommand();
175
+ const subcommandHints = subcommand
176
+ ? RETIRED_FLAG_HINTS_BY_COMMAND[subcommand]
177
+ : undefined;
106
178
  for (const arg of process.argv) {
107
179
  const bare = arg.split("=")[0];
108
- if (RETIRED_FLAGS.includes(bare)) {
109
- return bare;
180
+ if (subcommandHints && bare in subcommandHints) {
181
+ return { flag: bare, hint: subcommandHints[bare] };
110
182
  }
183
+ if (bare in RETIRED_FLAG_HINTS) {
184
+ return { flag: bare, hint: RETIRED_FLAG_HINTS[bare] };
185
+ }
186
+ }
187
+ return undefined;
188
+ }
189
+ function findRetiredCommand() {
190
+ // The first non-flag argument after `ailf` (argv[0]=node, argv[1]=cli.ts).
191
+ for (let i = 2; i < process.argv.length; i++) {
192
+ const arg = process.argv[i];
193
+ if (!arg.startsWith("-") && arg in RETIRED_COMMAND_HINTS)
194
+ return arg;
195
+ if (!arg.startsWith("-"))
196
+ return undefined;
111
197
  }
112
198
  return undefined;
113
199
  }
@@ -119,132 +205,32 @@ function findRetiredEnv() {
119
205
  return undefined;
120
206
  }
121
207
  const retiredFlag = findRetiredFlag();
208
+ const retiredCommand = findRetiredCommand();
122
209
  const retiredEnv = findRetiredEnv();
123
- if (retiredFlag || retiredEnv) {
124
- const source = retiredFlag
125
- ? `flag "${retiredFlag}"`
126
- : `environment variable "${retiredEnv}"`;
127
- console.error(`❌ ${source} was retired in W0052 along with the legacy artifact collector.`);
128
- console.error(" Use --artifacts-dir / --no-artifacts / --artifacts-exclude instead.");
129
- console.error(" See docs/cli.md and docs/decisions/D0033-unified-run-anchored-artifact-capture.md.");
210
+ if (retiredFlag || retiredCommand || retiredEnv) {
211
+ if (retiredFlag) {
212
+ console.error(`❌ flag "${retiredFlag.flag}" was retired.`);
213
+ console.error(retiredFlag.hint);
214
+ }
215
+ else if (retiredCommand) {
216
+ console.error(`❌ command "${retiredCommand}" was retired.`);
217
+ console.error(RETIRED_COMMAND_HINTS[retiredCommand]);
218
+ }
219
+ else if (retiredEnv) {
220
+ console.error(`❌ environment variable "${retiredEnv}" was retired in W0052 along with the legacy artifact collector.`);
221
+ console.error(RETIRED_CAPTURE_HINT);
222
+ }
130
223
  process.exit(2);
131
224
  }
132
225
  // ---------------------------------------------------------------------------
133
- // Build CLI program
226
+ // Build CLI program (delegates Commander wiring to ./cli-program.ts)
134
227
  // ---------------------------------------------------------------------------
135
- import { Command } from "commander";
136
- import { CommandGroup, configureProgram } from "./commands/shared/help.js";
137
- // Read version from package.json
138
- const pkgPath = resolve(ROOT, "package.json");
139
- const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
140
- const program = new Command()
141
- .name("ailf")
142
- .description("AI Literacy Framework — evaluate how well docs enable AI coding tools\n\nMeasure whether AI coding agents can find the right documentation\nand produce correct implementations of your product features.")
143
- .version(pkg.version)
144
- .option("-v, --verbose", "Increase log output")
145
- .option("-q, --quiet", "Suppress non-error output")
146
- .option("--dotenv <path>", "Override default .env file path")
147
- .option("--explain", "Show execution plan without running")
148
- .option("--format <fmt>", "Output format for --explain (console, json)", "console")
149
- .option("-y, --yes", "With --explain: show plan then prompt to confirm execution");
150
- configureProgram(program);
228
+ const program = buildCliProgram({ evalRoot: ROOT });
151
229
  // ---------------------------------------------------------------------------
152
- // Global --explain hookintercepts any command before execution
153
- // ---------------------------------------------------------------------------
154
- program.hook("preAction", async (thisCommand, actionCommand) => {
155
- const globalOpts = thisCommand.opts();
156
- if (!globalOpts.explain)
157
- return;
158
- const { handleExplain } = await import("./commands/explain-handler.js");
159
- try {
160
- await handleExplain(actionCommand, globalOpts.yes ?? false, EVAL_ROOT);
161
- process.exit(0);
162
- }
163
- catch (err) {
164
- // Sentinel from --yes confirmation: user wants to proceed
165
- if (err !== null &&
166
- typeof err === "object" &&
167
- "__proceedArgv" in err) {
168
- const filteredArgv = err.__proceedArgv;
169
- console.log("\n ▸ Proceeding with execution…\n");
170
- await program.parseAsync(filteredArgv);
171
- return;
172
- }
173
- throw err;
174
- }
175
- });
176
- // ---------------------------------------------------------------------------
177
- // Register commands
230
+ // Parse and rundefault to showing help when no arguments given.
178
231
  //
179
- // Registration order determines group display order in --help.
180
- // Within each group, commands appear in the order they are added.
181
- // ---------------------------------------------------------------------------
182
- // ── Core Workflow ──────────────────────────────────────────────────────
183
- import { createPipelineCommand } from "./commands/pipeline.js";
184
- program.addCommand(createPipelineCommand().helpGroup(CommandGroup.CoreWorkflow));
185
- import { createCompareCommand } from "./commands/compare.js";
186
- program.addCommand(createCompareCommand().helpGroup(CommandGroup.CoreWorkflow));
187
- import { createBaselineCommand } from "./commands/baseline.js";
188
- program.addCommand(createBaselineCommand().helpGroup(CommandGroup.CoreWorkflow));
189
- import { createPublishCommand } from "./commands/publish.js";
190
- program.addCommand(createPublishCommand().helpGroup(CommandGroup.CoreWorkflow));
191
- import { createRunsCommand } from "./commands/runs.js";
192
- program.addCommand(createRunsCommand().helpGroup(CommandGroup.CoreWorkflow));
193
- // ── Analysis & Reports ────────────────────────────────────────────────
194
- import { createReadinessReportCommand } from "./commands/readiness-report.js";
195
- program.addCommand(createReadinessReportCommand().helpGroup(CommandGroup.AnalysisReports));
196
- import { createChronicFailuresCommand } from "./commands/chronic-failures.js";
197
- program.addCommand(createChronicFailuresCommand().helpGroup(CommandGroup.AnalysisReports));
198
- import { createCoverageAuditCommand } from "./commands/coverage-audit.js";
199
- program.addCommand(createCoverageAuditCommand().helpGroup(CommandGroup.AnalysisReports));
200
- import { createDiscoveryReportCommand } from "./commands/discovery-report.js";
201
- program.addCommand(createDiscoveryReportCommand().helpGroup(CommandGroup.AnalysisReports));
202
- import { createAgentReportCommand } from "./commands/agent-report.js";
203
- program.addCommand(createAgentReportCommand().helpGroup(CommandGroup.AnalysisReports));
204
- import { createWeeklyDigestCommand } from "./commands/weekly-digest.js";
205
- program.addCommand(createWeeklyDigestCommand().helpGroup(CommandGroup.AnalysisReports));
206
- import { createCheckStalenessCommand } from "./commands/check-staleness.js";
207
- program.addCommand(createCheckStalenessCommand().helpGroup(CommandGroup.AnalysisReports));
208
- // ── Grader Reliability ────────────────────────────────────────────────
209
- import { createGraderCommand } from "./commands/grader/index.js";
210
- program.addCommand(createGraderCommand().helpGroup(CommandGroup.GraderReliability));
211
- // ── Setup & Configuration ─────────────────────────────────────────────
212
- import { createInitCommand } from "./commands/init.js";
213
- program.addCommand(createInitCommand().helpGroup(CommandGroup.SetupConfig));
214
- import { createValidateCommand } from "./commands/validate.js";
215
- program.addCommand(createValidateCommand().helpGroup(CommandGroup.SetupConfig));
216
- import { createValidateTasksCommand } from "./commands/validate-tasks.js";
217
- program.addCommand(createValidateTasksCommand().helpGroup(CommandGroup.SetupConfig));
218
- import { createFetchDocsCommand } from "./commands/fetch-docs.js";
219
- program.addCommand(createFetchDocsCommand().helpGroup(CommandGroup.SetupConfig));
220
- import { createCacheCommand } from "./commands/cache.js";
221
- program.addCommand(createCacheCommand().helpGroup(CommandGroup.SetupConfig));
222
- // ── Pipeline Internals ────────────────────────────────────────────────
223
- import { createEvalCommand } from "./commands/eval.js";
224
- program.addCommand(createEvalCommand().helpGroup(CommandGroup.PipelineInternals));
225
- import { createCalculateScoresCommand } from "./commands/calculate-scores.js";
226
- program.addCommand(createCalculateScoresCommand().helpGroup(CommandGroup.PipelineInternals));
227
- import { createPrCommentCommand } from "./commands/pr-comment.js";
228
- program.addCommand(createPrCommentCommand().helpGroup(CommandGroup.PipelineInternals));
229
- import { createGenerateConfigsCommand } from "./commands/generate-configs.js";
230
- program.addCommand(createGenerateConfigsCommand().helpGroup(CommandGroup.PipelineInternals));
231
- import { createMeasureRetrievalCommand } from "./commands/measure-retrieval.js";
232
- program.addCommand(createMeasureRetrievalCommand().helpGroup(CommandGroup.PipelineInternals));
233
- import { createLookupDocCommand } from "./commands/lookup-doc.js";
234
- program.addCommand(createLookupDocCommand().helpGroup(CommandGroup.PipelineInternals));
235
- import { createWebhookServerCommand } from "./commands/webhook-server.js";
236
- program.addCommand(createWebhookServerCommand().helpGroup(CommandGroup.PipelineInternals));
237
- // ── Developer Tools ───────────────────────────────────────────────────
238
- import { createInteractiveCommand } from "./commands/interactive.js";
239
- program.addCommand(createInteractiveCommand().helpGroup(CommandGroup.DeveloperTools));
240
- // Shell completion — must be registered last (needs full program tree)
241
- import { createCompletionCommand } from "./commands/completion.js";
242
- program.addCommand(createCompletionCommand(program).helpGroup(CommandGroup.DeveloperTools));
243
- // ---------------------------------------------------------------------------
244
- // Parse and run — default to showing help when no arguments given
245
- // ---------------------------------------------------------------------------
246
- // If no command is specified (just `ailf`), show help.
247
232
  // The interactive wizard is still available via `ailf interactive`.
233
+ // ---------------------------------------------------------------------------
248
234
  if (process.argv.length <= 2) {
249
235
  program.outputHelp();
250
236
  }
@@ -6,7 +6,7 @@ import { dirname, join } from "path";
6
6
  import { Command } from "commander";
7
7
  import { analyzeResults } from "../pipeline/agent-behavior-report.js";
8
8
  export function createAgentReportCommand() {
9
- return new Command("agent-report")
9
+ return new Command("agent")
10
10
  .description("Generate an agent behavior observation report from eval results")
11
11
  .argument("[results-path]", "Path to eval-results.json (default: results/latest/eval-results.json)")
12
12
  .action(async (resultsPath) => {
@@ -30,8 +30,6 @@ export function createCalculateScoresCommand() {
30
30
  skipEval: true,
31
31
  compareEnabled: false,
32
32
  gapAnalysisEnabled: false,
33
- readinessEnabled: false,
34
- discoveryReportEnabled: false,
35
33
  publishEnabled: false,
36
34
  noCache: true,
37
35
  noRemoteCache: true,
@@ -12,7 +12,7 @@
12
12
  */
13
13
  import { Command } from "commander";
14
14
  export function createCheckStalenessCommand() {
15
- return new Command("check-staleness")
15
+ return new Command("staleness")
16
16
  .description("Exit 1 if no evaluation report has been produced within the max-age window")
17
17
  .option("--max-age <days>", "Max age in days before reports are considered stale", (v) => Number.parseInt(v, 10), 3)
18
18
  .action(async (opts) => {
@@ -11,15 +11,15 @@ export function createChronicFailuresCommand() {
11
11
  return new Command("chronic-failures")
12
12
  .description("Identify tasks that error in >50% of recent evaluation runs")
13
13
  .option("--lookback <n>", "Number of recent reports to analyze", (v) => parseInt(v, 10), 10)
14
- .option("--threshold <n>", "Error rate threshold (0-1) for chronic classification", (v) => parseFloat(v), 0.5)
15
- .option("--json", "Output raw JSON", false)
14
+ .option("--error-rate <n>", "Error rate threshold (0-1) for chronic classification", (v) => parseFloat(v), 0.5)
15
+ .option("-f, --format <fmt>", "Output format: console or json", "console")
16
16
  .action(async (opts) => {
17
17
  const reportStore = new ReportStore();
18
18
  const report = await detectChronicFailures(reportStore, {
19
19
  lookback: opts.lookback,
20
- threshold: opts.threshold,
20
+ threshold: opts.errorRate,
21
21
  });
22
- if (opts.json) {
22
+ if (opts.format === "json") {
23
23
  console.log(JSON.stringify(report, null, 2));
24
24
  }
25
25
  else {
@@ -13,10 +13,9 @@ import { createSanityLiteracyPreset } from "../pipeline/compiler/presets/index.j
13
13
  const __dirname = dirname(fileURLToPath(import.meta.url));
14
14
  const ROOT = resolve(__dirname, "..", "..");
15
15
  export function createCoverageAuditCommand() {
16
- return new Command("coverage-audit")
16
+ return new Command("coverage")
17
17
  .description("Run documentation coverage audit against feature registry")
18
- .option("--format <fmt>", "Output format: table, md, markdown")
19
- .option("--json", "Output raw JSON", false)
18
+ .option("-f, --format <fmt>", "Output format: table, md, markdown, json", "table")
20
19
  .action(async (opts) => {
21
20
  // Build a registry with mode base + preset so coverage audit works
22
21
  // even when config/features.ts is empty (preset is source of truth).
@@ -28,17 +27,17 @@ export function createCoverageAuditCommand() {
28
27
  console.error("❌ Coverage audit failed. Ensure config/features.yaml exists and is valid.");
29
28
  process.exit(1);
30
29
  }
31
- if (opts.json) {
30
+ const isMarkdown = opts.format === "md" || opts.format === "markdown";
31
+ if (opts.format === "json") {
32
32
  console.log(JSON.stringify(report, null, 2));
33
33
  }
34
- else if (opts.format === "md" || opts.format === "markdown") {
34
+ else if (isMarkdown) {
35
35
  console.log(formatCoverageMarkdown(report));
36
36
  }
37
37
  else {
38
38
  console.log(formatCoverageConsole(report));
39
39
  }
40
- // Print document utilization stats for non-JSON console output
41
- if (!opts.json && opts.format !== "md" && opts.format !== "markdown") {
40
+ if (opts.format !== "json" && !isMarkdown) {
42
41
  const docStats = countReferencedDocs(ROOT);
43
42
  console.log("DOCUMENT UTILIZATION:");
44
43
  console.log(` ${docStats.total} unique document slugs referenced across evaluation tasks`);
@@ -13,17 +13,29 @@ import { fileURLToPath } from "url";
13
13
  import { formatDiscoveryMarkdown, generateDiscoveryReport, } from "../pipeline/discovery-report.js";
14
14
  const __dirname = dirname(fileURLToPath(import.meta.url));
15
15
  const ROOT = resolve(__dirname, "..", "..");
16
+ const DEFAULT_RESULTS_DIR = join(ROOT, "results", "latest");
17
+ /** Resolve `--from-run` to an absolute path to score-summary.json. */
18
+ function resolveFromRun(value) {
19
+ if (value === "latest")
20
+ return join(DEFAULT_RESULTS_DIR, "score-summary.json");
21
+ // If the value points at a directory, look for score-summary.json inside it.
22
+ // Otherwise treat it as a direct file path.
23
+ const resolved = resolve(value);
24
+ return resolved.endsWith(".json")
25
+ ? resolved
26
+ : join(resolved, "score-summary.json");
27
+ }
16
28
  export function createDiscoveryReportCommand() {
17
- return new Command("discovery-report")
29
+ return new Command("discovery")
18
30
  .description("Generate agent discoverability report from retrieval metrics")
19
31
  .option("-a, --area <areas>", "Filter by feature areas (comma-separated)")
20
32
  .option("-o, --output <path>", "Write markdown to file instead of stdout")
21
- .option("-i, --input <path>", "Path to score-summary.json", join(ROOT, "results", "latest", "score-summary.json"))
33
+ .option("--from-run <path>", "Results to read from (`latest`, a results directory, or a direct path to score-summary.json)", "latest")
22
34
  .action(async (opts) => {
23
- const summaryPath = opts.input;
35
+ const summaryPath = resolveFromRun(opts.fromRun);
24
36
  if (!existsSync(summaryPath)) {
25
37
  console.error(`❌ Score summary not found: ${summaryPath}`);
26
- console.error("Run an agentic evaluation first: pnpm pipeline -- --mode agentic");
38
+ console.error("Run an agentic evaluation first: ailf run --mode agentic");
27
39
  process.exit(1);
28
40
  }
29
41
  const summary = JSON.parse(readFileSync(summaryPath, "utf-8"));
@@ -2,7 +2,7 @@
2
2
  * eval command — run promptfoo evaluation directly (passthrough).
3
3
  *
4
4
  * This is a convenience command that forwards to `promptfoo eval`.
5
- * For most use cases, prefer `ailf pipeline` which handles the full
5
+ * For most use cases, prefer `ailf run` which handles the full
6
6
  * lifecycle (fetch → generate → eval → score → report).
7
7
  */
8
8
  import { Command } from "commander";
@@ -2,7 +2,7 @@
2
2
  * eval command — run promptfoo evaluation directly (passthrough).
3
3
  *
4
4
  * This is a convenience command that forwards to `promptfoo eval`.
5
- * For most use cases, prefer `ailf pipeline` which handles the full
5
+ * For most use cases, prefer `ailf run` which handles the full
6
6
  * lifecycle (fetch → generate → eval → score → report).
7
7
  */
8
8
  import { Command } from "commander";
@@ -12,7 +12,7 @@
12
12
  * Every command is registered in `EXPLAIN_REGISTRY` with either:
13
13
  * - **Static metadata** — description, filesRead, filesCreated, steps
14
14
  * - **A builder function** — for commands that need to inspect CLI options
15
- * or perform async work (e.g., pipeline, init)
15
+ * or perform async work (e.g., run, init)
16
16
  *
17
17
  * Adding --explain support for a new command = adding one registry entry.
18
18
  * Commands not in the registry fall back to a minimal generic plan.
@@ -12,7 +12,7 @@
12
12
  * Every command is registered in `EXPLAIN_REGISTRY` with either:
13
13
  * - **Static metadata** — description, filesRead, filesCreated, steps
14
14
  * - **A builder function** — for commands that need to inspect CLI options
15
- * or perform async work (e.g., pipeline, init)
15
+ * or perform async work (e.g., run, init)
16
16
  *
17
17
  * Adding --explain support for a new command = adding one registry entry.
18
18
  * Commands not in the registry fall back to a minimal generic plan.
@@ -39,7 +39,7 @@ const EXPLAIN_REGISTRY = {
39
39
  // ── Dynamic builders (inspect CLI options) ────────────────────────
40
40
  baseline: buildBaselineExplainPlan,
41
41
  init: buildInitExplainPlan,
42
- pipeline: buildPipelineExplainPlan,
42
+ run: buildPipelineExplainPlan,
43
43
  // ── Static metadata ───────────────────────────────────────────────
44
44
  "agent-report": {
45
45
  description: "Generate an agent behavior observation report from eval results",
@@ -588,15 +588,11 @@ export async function handleExplain(actionCommand, confirmExecution, rootDir) {
588
588
  * Build a plan for the `init` command.
589
589
  *
590
590
  * Shows which files and directories will be created, taking into
591
- * account the --output-format and --path flags.
591
+ * account the --format and --path flags.
592
592
  */
593
593
  function buildInitExplainPlan(actionCommand, rootDir) {
594
594
  const opts = actionCommand.opts();
595
- const format = opts.outputFormat === "json"
596
- ? "json"
597
- : opts.outputFormat === "yaml"
598
- ? "yaml"
599
- : "ts";
595
+ const format = opts.format === "json" ? "json" : opts.format === "yaml" ? "yaml" : "ts";
600
596
  const taskExt = format === "ts" ? ".task.ts" : format === "yaml" ? ".yaml" : ".json";
601
597
  const configFile = format === "ts"
602
598
  ? "ailf.config.ts"
@@ -667,69 +663,44 @@ function buildBaselineExplainPlan(actionCommand, rootDir) {
667
663
  });
668
664
  }
669
665
  /**
670
- * Build a plan for the `pipeline` command — the richest plan with steps,
666
+ * Build a plan for the `run` command — the richest plan with steps,
671
667
  * tasks, models, cost estimates, and cache predictions.
672
668
  */
673
669
  async function buildPipelineExplainPlan(actionCommand, rootDir) {
674
670
  const raw = actionCommand.opts();
675
671
  // Merge Commander-parsed opts with safe defaults for array/boolean fields
676
672
  const withDefaults = {
677
- allowedOrigin: raw.allowedOrigin ?? [],
678
- allowedOrigins: raw.allowedOrigins ?? [],
679
673
  area: raw.area,
680
674
  autoScope: raw.autoScope ?? true,
681
- before: raw.before,
675
+ beforeSource: raw.beforeSource,
682
676
  cache: raw.cache ?? true,
683
677
  changedDocs: raw.changedDocs,
684
- compare: raw.compare ?? false,
685
- compareBaseline: raw.compareBaseline,
686
- concurrency: raw.concurrency,
678
+ compare: raw.compare,
687
679
  debug: raw.debug ?? false,
688
- debugN: raw.debugN,
689
- debugPattern: raw.debugPattern,
690
- debugSample: raw.debugSample,
691
- discoveryReport: raw.discoveryReport ?? false,
680
+ filterFirstN: raw.filterFirstN,
681
+ filterPattern: raw.filterPattern,
682
+ filterSample: raw.filterSample,
692
683
  dryRun: raw.dryRun ?? false,
693
- gapAnalysis: raw.gapAnalysis ?? true,
694
- graderReplications: raw.graderReplications,
695
- header: raw.header ?? [],
696
- headers: raw.headers ?? [],
697
684
  mode: raw.mode ?? LiteracyVariant.FULL,
698
685
  output: raw.output,
699
686
  promptfooUrl: raw.promptfooUrl,
700
687
  publish: raw.publish,
701
688
  publishTag: raw.publishTag,
702
- readiness: raw.readiness ?? false,
703
- reportDataset: raw.reportDataset,
704
- reportProject: raw.reportProject,
705
- sanityDataset: raw.sanityDataset,
706
689
  sanityDocument: raw.sanityDocument ?? [],
707
- sanityDocuments: raw.sanityDocuments ?? [],
708
690
  sanityPerspective: raw.sanityPerspective,
709
- sanityProject: raw.sanityProject,
710
- sanityStudioOrigin: raw.sanityStudioOrigin,
711
691
  search: raw.search,
712
- skipEval: raw.skipEval ?? false,
713
- skipFetch: raw.skipFetch ?? false,
692
+ eval: raw.eval ?? true,
693
+ fetch: raw.fetch ?? true,
714
694
  source: raw.source,
715
695
  tag: raw.tag ?? [],
716
696
  task: raw.task,
717
697
  threshold: raw.threshold,
718
698
  url: raw.url ?? [],
719
- urls: raw.urls ?? [],
720
699
  remote: raw.remote ?? false,
721
- apiUrl: raw.apiUrl,
722
- repoTasksPath: raw.repoTasksPath,
723
- taskSource: raw.taskSource,
724
700
  remoteCache: raw.remoteCache,
725
701
  config: raw.config,
726
- artifacts: raw.artifacts ?? true,
727
- artifactsDir: raw.artifactsDir,
728
- artifactsDryRun: raw.artifactsDryRun ?? false,
729
- artifactsExclude: raw.artifactsExclude,
702
+ artifactsWrite: raw.artifactsWrite ?? true,
730
703
  classification: raw.classification,
731
- ownerTeam: raw.ownerTeam,
732
- ownerIndividual: raw.ownerIndividual,
733
704
  purpose: raw.purpose,
734
705
  label: raw.label ?? [],
735
706
  };
@@ -742,7 +713,6 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
742
713
  compareThreshold: resolved.compareThreshold,
743
714
  concurrency: resolved.concurrency,
744
715
  debug: resolved.debug,
745
- discoveryReportEnabled: resolved.discoveryReportEnabled,
746
716
  dryRun: resolved.dryRun,
747
717
  gapAnalysisEnabled: resolved.gapAnalysisEnabled,
748
718
  graderReplications: resolved.graderReplications,
@@ -750,7 +720,6 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
750
720
  variant: resolved.variant,
751
721
  noCache: resolved.noCache,
752
722
  publishEnabled: resolved.publishEnabled,
753
- readinessEnabled: resolved.readinessEnabled,
754
723
  skipEval: resolved.skipEval,
755
724
  skipFetch: resolved.skipFetch,
756
725
  source: resolved.source,
@@ -50,8 +50,6 @@ async function executeFetchDocs(opts) {
50
50
  skipEval: true,
51
51
  compareEnabled: false,
52
52
  gapAnalysisEnabled: false,
53
- readinessEnabled: false,
54
- discoveryReportEnabled: false,
55
53
  publishEnabled: false,
56
54
  noCache: true,
57
55
  noRemoteCache: true,
@@ -28,8 +28,6 @@ export function createGenerateConfigsCommand() {
28
28
  skipEval: true,
29
29
  compareEnabled: false,
30
30
  gapAnalysisEnabled: false,
31
- readinessEnabled: false,
32
- discoveryReportEnabled: false,
33
31
  publishEnabled: false,
34
32
  noCache: true,
35
33
  noRemoteCache: true,