@sanity/ailf 3.6.0 → 3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/config/airbyte/ai_literacy_framework.connector.yaml +1 -1
  2. package/config/thresholds.ts +3 -3
  3. package/dist/_vendor/ailf-core/examples/index.d.ts +2 -2
  4. package/dist/_vendor/ailf-core/examples/index.js +2 -2
  5. package/dist/_vendor/ailf-core/ports/context.d.ts +0 -4
  6. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +38 -12
  7. package/dist/_vendor/ailf-core/schemas/eval-config.js +102 -22
  8. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +4 -6
  9. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -3
  10. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +2 -2
  11. package/dist/_vendor/ailf-shared/run-classification.d.ts +2 -2
  12. package/dist/_vendor/ailf-shared/run-classification.js +1 -1
  13. package/dist/_vendor/ailf-shared/run-context.d.ts +1 -1
  14. package/dist/adapters/api-client/build-request.d.ts +0 -2
  15. package/dist/adapters/api-client/build-request.js +2 -6
  16. package/dist/adapters/config-sources/cli-config-adapter.d.ts +1 -1
  17. package/dist/adapters/config-sources/file-config-adapter.d.ts +1 -1
  18. package/dist/adapters/config-sources/file-config-adapter.js +38 -12
  19. package/dist/adapters/task-sources/content-lake-task-source.js +17 -0
  20. package/dist/adapters/task-sources/index.d.ts +1 -1
  21. package/dist/adapters/task-sources/index.js +1 -1
  22. package/dist/adapters/task-sources/repo-schemas.d.ts +154 -0
  23. package/dist/adapters/task-sources/repo-schemas.js +137 -0
  24. package/dist/cli.d.ts +2 -2
  25. package/dist/cli.js +134 -38
  26. package/dist/commands/agent-report.js +1 -1
  27. package/dist/commands/calculate-scores.js +0 -2
  28. package/dist/commands/check-staleness.js +1 -1
  29. package/dist/commands/chronic-failures.js +4 -4
  30. package/dist/commands/coverage-audit.js +6 -7
  31. package/dist/commands/discovery-report.js +16 -4
  32. package/dist/commands/eval.d.ts +1 -1
  33. package/dist/commands/eval.js +1 -1
  34. package/dist/commands/explain-handler.d.ts +1 -1
  35. package/dist/commands/explain-handler.js +13 -44
  36. package/dist/commands/fetch-docs.js +0 -2
  37. package/dist/commands/generate-configs.js +0 -2
  38. package/dist/commands/grader/index.js +3 -3
  39. package/dist/commands/init.d.ts +2 -2
  40. package/dist/commands/init.js +10 -9
  41. package/dist/commands/interactive.d.ts +1 -1
  42. package/dist/commands/interactive.js +8 -8
  43. package/dist/commands/pipeline-action.d.ts +1 -3
  44. package/dist/commands/pipeline-action.js +174 -140
  45. package/dist/commands/pr-comment.js +1 -3
  46. package/dist/commands/publish.d.ts +1 -1
  47. package/dist/commands/publish.js +2 -4
  48. package/dist/commands/readiness-report.js +17 -8
  49. package/dist/commands/remote-pipeline.d.ts +1 -1
  50. package/dist/commands/remote-pipeline.js +1 -3
  51. package/dist/commands/run.d.ts +64 -0
  52. package/dist/commands/{pipeline.js → run.js} +19 -30
  53. package/dist/commands/shared/help.js +4 -4
  54. package/dist/commands/shared/options.d.ts +29 -3
  55. package/dist/commands/shared/options.js +37 -13
  56. package/dist/commands/validate-tasks.js +1 -1
  57. package/dist/commands/validate.d.ts +1 -1
  58. package/dist/commands/validate.js +2 -2
  59. package/dist/commands/weekly-digest.js +3 -3
  60. package/dist/config/thresholds.ts +3 -3
  61. package/dist/orchestration/build-app-context.js +0 -2
  62. package/dist/orchestration/build-step-sequence.js +1 -11
  63. package/dist/orchestration/steps/fetch-docs-step.js +1 -1
  64. package/dist/orchestration/steps/index.d.ts +0 -2
  65. package/dist/orchestration/steps/index.js +0 -2
  66. package/dist/orchestration/steps/run-eval-step.js +1 -1
  67. package/dist/pipeline/cache.d.ts +1 -1
  68. package/dist/pipeline/map-request-to-config.js +0 -2
  69. package/dist/pipeline/plan.d.ts +2 -4
  70. package/dist/pipeline/plan.js +4 -32
  71. package/dist/pipeline/run-context.d.ts +1 -1
  72. package/dist/pipeline/run-context.js +4 -4
  73. package/dist/pipeline/validate.d.ts +1 -1
  74. package/dist/pipeline/validate.js +1 -1
  75. package/package.json +7 -7
  76. package/dist/commands/pipeline.d.ts +0 -77
  77. package/dist/orchestration/steps/discovery-report-step.d.ts +0 -13
  78. package/dist/orchestration/steps/discovery-report-step.js +0 -62
  79. package/dist/orchestration/steps/readiness-step.d.ts +0 -13
  80. package/dist/orchestration/steps/readiness-step.js +0 -98
@@ -3,7 +3,7 @@
3
3
  *
4
4
  * When `ailf` is run with no arguments (or `ailf interactive`), this module
5
5
  * prompts the user through mode selection, area scoping, debug options,
6
- * and common flags — then builds and executes the equivalent `ailf pipeline`
6
+ * and common flags — then builds and executes the equivalent `ailf run`
7
7
  * command.
8
8
  *
9
9
  * Uses @inquirer/prompts for a clean, modern terminal UI.
@@ -3,7 +3,7 @@
3
3
  *
4
4
  * When `ailf` is run with no arguments (or `ailf interactive`), this module
5
5
  * prompts the user through mode selection, area scoping, debug options,
6
- * and common flags — then builds and executes the equivalent `ailf pipeline`
6
+ * and common flags — then builds and executes the equivalent `ailf run`
7
7
  * command.
8
8
  *
9
9
  * Uses @inquirer/prompts for a clean, modern terminal UI.
@@ -52,9 +52,9 @@ async function runInteractiveWizard() {
52
52
  const workflow = await select({
53
53
  choices: [
54
54
  {
55
- description: "Full evaluation pipeline (fetch → eval → score → report)",
56
- name: "Run pipeline",
57
- value: "pipeline",
55
+ description: "Full evaluation run (fetch → eval → score → report)",
56
+ name: "Run evaluation",
57
+ value: "run",
58
58
  },
59
59
  {
60
60
  description: "Compare current scores against a saved baseline",
@@ -193,21 +193,21 @@ async function runInteractiveWizard() {
193
193
  });
194
194
  if (debugStyle === "first-n") {
195
195
  const n = await input({ default: "5", message: "Number of tests:" });
196
- args.push("--debug-n", n);
196
+ args.push("--filter-first-n", n);
197
197
  }
198
198
  else if (debugStyle === "sample") {
199
199
  const n = await input({
200
200
  default: "3",
201
201
  message: "Sample size:",
202
202
  });
203
- args.push("--debug-sample", n);
203
+ args.push("--filter-sample", n);
204
204
  }
205
205
  else if (debugStyle === "pattern") {
206
206
  const pattern = await input({
207
207
  message: "Description regex (e.g. Blog, webhook):",
208
208
  });
209
209
  if (pattern.trim()) {
210
- args.push("--debug-pattern", pattern.trim());
210
+ args.push("--filter-pattern", pattern.trim());
211
211
  }
212
212
  }
213
213
  }
@@ -238,5 +238,5 @@ async function runInteractiveWizard() {
238
238
  args.push("--explain", "--yes");
239
239
  }
240
240
  }
241
- return { args, command: "pipeline" };
241
+ return { args, command: "run" };
242
242
  }
@@ -12,7 +12,7 @@
12
12
  */
13
13
  import { type ImpactSummary } from "../pipeline/reverse-mapping.js";
14
14
  import type { DebugOptions, EvalMode } from "../pipeline/types.js";
15
- import type { PipelineCliOptions } from "./pipeline.js";
15
+ import type { PipelineCliOptions } from "./run.js";
16
16
  export interface ResolvedOptions {
17
17
  allowedOriginArgs: string[];
18
18
  areaOption?: string;
@@ -24,7 +24,6 @@ export interface ResolvedOptions {
24
24
  concurrency?: number;
25
25
  datasetOverride?: string;
26
26
  debug?: DebugOptions;
27
- discoveryReportEnabled: boolean;
28
27
  dryRun: boolean;
29
28
  gapAnalysisEnabled: boolean;
30
29
  graderReplications?: number;
@@ -46,7 +45,6 @@ export interface ResolvedOptions {
46
45
  /** True when --publish or --no-publish was explicitly passed by the user. */
47
46
  publishExplicit: boolean;
48
47
  publishTag?: string;
49
- readinessEnabled: boolean;
50
48
  reportDataset?: string;
51
49
  reportProjectId?: string;
52
50
  sanityDocumentArgs: string[];
@@ -37,6 +37,10 @@ const VALID_SEARCH_MODES = ["open", "origin-only", "off"];
37
37
  export function computeResolvedOptions(opts) {
38
38
  // Resolve paths relative to the caller's cwd, not the eval package root
39
39
  const callerCwd = getCallerCwd();
40
+ // `.ailf/config.yaml` is the per-environment config home for `ailf run`
41
+ // (W0077 Phase 6a). Load early so downstream cascades (source, agentic,
42
+ // owner, output, etc.) can read from it.
43
+ const repoConfig = loadRepoConfigIfPresent(callerCwd);
40
44
  // Validate + normalize mode via the single boundary function.
41
45
  // normalizeMode() maps legacy variant names (baseline, agentic, etc.)
42
46
  // to canonical mode "literacy" + variant, and throws on invalid input.
@@ -59,34 +63,34 @@ export function computeResolvedOptions(opts) {
59
63
  console.error(`❌ ${err instanceof Error ? err.message : String(err)}`);
60
64
  process.exit(1);
61
65
  }
62
- // Debug options — any sub-flag (--debug-n, --debug-pattern, --debug-sample)
63
- // implies --debug, so users don't need to pass both.
64
- // When DEBUG_EVAL is explicitly "0", ignore the sub-flags from env.
65
- // CLI flags (--debug-n, --debug-pattern, --debug-sample) always win.
66
+ // Debug + filter options — any filter flag (--filter-first-n,
67
+ // --filter-pattern, --filter-sample) implies --debug, so users don't
68
+ // need to pass both. When DEBUG_EVAL is explicitly "0", ignore the env
69
+ // vars. CLI flags always win over env vars.
66
70
  const debugEnvDisabled = process.env.DEBUG_EVAL === "0";
67
- const debugN = opts.debugN ??
71
+ const filterFirstN = opts.filterFirstN ??
68
72
  (process.env.DEBUG_EVAL_N && !debugEnvDisabled
69
73
  ? parseInt(process.env.DEBUG_EVAL_N, 10)
70
74
  : undefined);
71
- const debugPattern = opts.debugPattern ??
75
+ const filterPattern = opts.filterPattern ??
72
76
  (process.env.DEBUG_EVAL_PATTERN && !debugEnvDisabled
73
77
  ? process.env.DEBUG_EVAL_PATTERN
74
78
  : undefined);
75
- const debugSample = opts.debugSample ??
79
+ const filterSample = opts.filterSample ??
76
80
  (process.env.DEBUG_EVAL_SAMPLE && !debugEnvDisabled
77
81
  ? parseInt(process.env.DEBUG_EVAL_SAMPLE, 10)
78
82
  : undefined);
79
83
  const debugEnabled = opts.debug ||
80
84
  process.env.DEBUG_EVAL === "1" ||
81
- debugN !== undefined ||
82
- debugPattern !== undefined ||
83
- debugSample !== undefined;
85
+ filterFirstN !== undefined ||
86
+ filterPattern !== undefined ||
87
+ filterSample !== undefined;
84
88
  const debug = debugEnabled
85
89
  ? {
86
90
  enabled: true,
87
- firstN: debugN,
88
- pattern: debugPattern,
89
- sample: debugSample,
91
+ firstN: filterFirstN,
92
+ pattern: filterPattern,
93
+ sample: filterSample,
90
94
  }
91
95
  : undefined;
92
96
  // Search mode validation
@@ -95,16 +99,29 @@ export function computeResolvedOptions(opts) {
95
99
  console.error(`❌ Invalid --search mode "${searchMode}". Must be one of: ${VALID_SEARCH_MODES.join(", ")}`);
96
100
  process.exit(1);
97
101
  }
98
- // Merge repeatable args (singular + plural aliases)
99
- const urlArgs = [...opts.url, ...opts.urls];
100
- const headerArgs = [...opts.header, ...opts.headers];
101
- const allowedOriginArgs = [...opts.allowedOrigin, ...opts.allowedOrigins];
102
- const sanityDocumentArgs = [...opts.sanityDocument, ...opts.sanityDocuments];
103
- // Source overrides
104
- const datasetOverride = opts.sanityDataset;
105
- const projectIdOverride = opts.sanityProject;
102
+ // Merge repeatable args (singular + plural aliases). `headerArgs` and
103
+ // `allowedOriginArgs` are populated from `.ailf/config.yaml`'s `agentic`
104
+ // block (W0077 Phase 6f); the CLI flags `--header` and `--allowed-origin`
105
+ // were retired. The URL-classification block below may still append a
106
+ // host to `allowedOriginArgs` when neither config nor CLI provided one.
107
+ // The `DOC_HEADERS` and `DOC_ALLOWED_ORIGIN(S)` env vars still merge in
108
+ // sources.ts at the doc-fetch boundary (additive, unchanged).
109
+ const urlArgs = opts.url;
110
+ const headerArgs = [];
111
+ const allowedOriginArgs = [];
112
+ const sanityDocumentArgs = opts.sanityDocument;
113
+ if (repoConfig?.agentic?.headers) {
114
+ for (const [key, value] of Object.entries(repoConfig.agentic.headers)) {
115
+ headerArgs.push(`${key}: ${value}`);
116
+ }
117
+ }
118
+ if (repoConfig?.agentic?.allowedOrigins) {
119
+ allowedOriginArgs.push(...repoConfig.agentic.allowedOrigins);
120
+ }
121
+ // Source overrides — perspective stays per-run (CLI flag), the dataset /
122
+ // project / studio-origin trio moved into `.ailf/config.yaml`'s `source`
123
+ // block in W0077 Phase 6d. Env vars still win over the config-file value.
106
124
  const perspectiveOverride = opts.sanityPerspective;
107
- const studioOriginOverride = opts.sanityStudioOrigin;
108
125
  // URL classification (pure computation — results captured, not applied to env)
109
126
  if (urlArgs.length > 0) {
110
127
  const classification = classifyUrls(urlArgs);
@@ -115,21 +132,6 @@ export function computeResolvedOptions(opts) {
115
132
  sanityDocumentArgs.push(...merged);
116
133
  }
117
134
  }
118
- // Validate custom headers (early error)
119
- if (headerArgs.length > 0) {
120
- for (const h of headerArgs) {
121
- const colonIdx = h.indexOf(":");
122
- if (colonIdx === -1) {
123
- console.error(`❌ Invalid header format: "${h}". Expected "Key: Value".`);
124
- process.exit(1);
125
- }
126
- const key = h.slice(0, colonIdx).trim();
127
- if (!key) {
128
- console.error(`❌ Invalid header: empty key in "${h}"`);
129
- process.exit(1);
130
- }
131
- }
132
- }
133
135
  // Auto-infer allowed origin from --url
134
136
  if (urlArgs.length > 0 && allowedOriginArgs.length === 0) {
135
137
  try {
@@ -170,22 +172,48 @@ export function computeResolvedOptions(opts) {
170
172
  }
171
173
  }
172
174
  }
173
- // Comparison: --before auto-enables --compare
174
- const beforeOption = opts.before;
175
- const compareEnabled = opts.compare || beforeOption !== undefined;
176
- // Publish: smart default auto-publish full runs when report store is configured
175
+ // Comparison: --before-source auto-enables --compare. The `--compare` flag
176
+ // is a Commander optional-argument: undefined when not passed, `true` for
177
+ // the bare flag (compare against latest), and a string path when the user
178
+ // pinned a specific baseline (`--compare path/to/baseline.json`).
179
+ const beforeOption = opts.beforeSource;
180
+ const compareEnabled = (opts.compare !== undefined && opts.compare !== false) ||
181
+ beforeOption !== undefined;
182
+ const compareBaseline = typeof opts.compare === "string" ? opts.compare : undefined;
183
+ // Task-source resolution (W0077 Phase 6h) — `--task-source` and
184
+ // `--repo-tasks-path` retired. Both move under `taskSource: {...}` in
185
+ // `.ailf/config.yaml`. Cascade: config → built-in default (content-lake).
186
+ // When type is `repo` and no path is set, fall back to `<cwd>/.ailf/tasks/`
187
+ // (the location `ailf init` scaffolds).
188
+ const resolvedTaskSourceType = resolveTaskSourceType(repoConfig?.taskSource?.type);
189
+ const resolvedRepoTasksPath = resolveRepoTasksPath(callerCwd, repoConfig?.taskSource?.repoTasksPath, resolvedTaskSourceType);
190
+ // Source overrides (W0077 Phase 6d) — `--sanity-dataset`, `--sanity-project`,
191
+ // and `--sanity-studio-origin` were retired from `ailf run`. Cascade is now:
192
+ // env var > .ailf/config.yaml `source.*` > built-in default (in sources.ts).
193
+ const datasetOverride = process.env.SANITY_DATASET ?? repoConfig?.source?.dataset;
194
+ const projectIdOverride = process.env.SANITY_PROJECT_ID ?? repoConfig?.source?.projectId;
195
+ const studioOriginOverride = process.env.SANITY_STUDIO_ORIGIN ?? repoConfig?.source?.studioOrigin;
196
+ // Report store overrides (W0077 Phase 6e — `--report-dataset` and
197
+ // `--report-project` retired). Resolution order:
198
+ // 1. Environment variables (AILF_REPORT_DATASET, AILF_REPORT_PROJECT_ID)
199
+ // 2. .ailf/config.yaml reportStore block
200
+ // 3. Eval dataset override (so perspective evals publish to the same dataset)
201
+ const reportDataset = process.env.AILF_REPORT_DATASET ??
202
+ repoConfig?.reportStore?.dataset ??
203
+ datasetOverride ??
204
+ undefined;
205
+ const reportProjectId = process.env.AILF_REPORT_PROJECT_ID ??
206
+ repoConfig?.reportStore?.projectId ??
207
+ undefined;
208
+ // Publish polarity (W0077 Phase 4) — auto policy lives in
209
+ // .ailf/config.yaml's `publish.auto` (or env / default). CLI flags and
210
+ // AILF_PUBLISH still override the policy.
177
211
  const reportStoreToken = process.env.AILF_REPORT_SANITY_API_TOKEN ?? process.env.SANITY_API_TOKEN;
178
212
  const reportStoreConfigured = Boolean(reportStoreToken);
179
- // Track whether the user explicitly chose --publish or --no-publish.
180
- // In remote mode, when this is false we omit the field from the API
181
- // request so the server can apply its own default (publish when jobId
182
- // is present). Without this, the local smart-default (which checks for
183
- // a local Sanity token the CLI doesn't have) would send publish:false
184
- // and suppress server-side report publishing.
185
213
  const publishExplicit = opts.publish !== undefined || process.env.AILF_PUBLISH !== undefined;
214
+ const publishAuto = resolvePublishAuto(repoConfig?.publish?.auto);
186
215
  let publishEnabled;
187
216
  if (opts.publish !== undefined) {
188
- // Explicit --publish or --no-publish always wins
189
217
  publishEnabled = opts.publish;
190
218
  }
191
219
  else if (process.env.AILF_PUBLISH === "1") {
@@ -195,36 +223,40 @@ export function computeResolvedOptions(opts) {
195
223
  publishEnabled = false;
196
224
  }
197
225
  else {
198
- // Smart default: full runs auto-publish when store is configured
199
- publishEnabled = reportStoreConfigured && !debugEnabled;
226
+ // Apply the auto policy. The report store still has to be configured
227
+ // for `auto: "always"` and `"full-runs"` — without a token, publishing
228
+ // is impossible regardless of policy.
229
+ switch (publishAuto) {
230
+ case "always":
231
+ publishEnabled = reportStoreConfigured;
232
+ break;
233
+ case "never":
234
+ publishEnabled = false;
235
+ break;
236
+ case "full-runs":
237
+ default:
238
+ publishEnabled = reportStoreConfigured && !debugEnabled;
239
+ break;
240
+ }
200
241
  }
201
- // Resolve task source + repo tasks path before anything that depends on
202
- // them (report store overrides, output dir). When --task-source=repo is
203
- // set without --repo-tasks-path, default to ./.ailf/tasks/ the location
204
- // created by `ailf init`.
205
- const resolvedTaskSourceType = resolveTaskSourceType(opts.taskSource);
206
- const resolvedRepoTasksPath = resolveRepoTasksPath(callerCwd, opts.repoTasksPath, resolvedTaskSourceType);
207
- // Report store overrides — resolution order:
208
- // 1. Explicit CLI flags (--report-dataset, --report-project)
209
- // 2. Environment variables (AILF_REPORT_DATASET, AILF_REPORT_PROJECT_ID)
210
- // 3. .ailf/config.yaml reportStore block (when repo tasks path is set)
211
- // 4. Eval dataset override (so perspective evals publish to the same dataset)
212
- const repoConfig = loadRepoConfigIfPresent(resolvedRepoTasksPath);
213
- const reportDataset = opts.reportDataset ??
214
- process.env.AILF_REPORT_DATASET ??
215
- repoConfig?.reportStore?.dataset ??
216
- datasetOverride ??
217
- undefined;
218
- const reportProjectId = opts.reportProject ??
219
- process.env.AILF_REPORT_PROJECT_ID ??
220
- repoConfig?.reportStore?.projectId ??
221
- undefined;
242
+ // Tag default cascade: --publish-tag > AILF_PUBLISH_TAG > .ailf/config.yaml
243
+ const publishTag = opts.publishTag ?? process.env.AILF_PUBLISH_TAG ?? repoConfig?.publish?.tag;
244
+ // Execution-tier resolution (W0077 Phase 6b)concurrency, grader
245
+ // replications, gap analysis, and api URL all moved from CLI flags to
246
+ // `.ailf/config.yaml`'s `execution` block. Cascade for each:
247
+ // env var (where one exists) > .ailf/config.yaml > built-in default
248
+ const concurrency = repoConfig?.execution?.concurrency;
249
+ const graderReplications = repoConfig?.execution?.graderReplications;
250
+ const gapAnalysisEnabled = repoConfig?.execution?.gapAnalysis ?? true;
222
251
  // Remote mode
223
252
  const remote = opts.remote || process.env.AILF_REMOTE === "1";
224
- const apiUrl = opts.apiUrl ?? process.env.AILF_API_URL ?? "https://ailf-api.sanity.build";
253
+ const apiUrl = process.env.AILF_API_URL ??
254
+ repoConfig?.execution?.apiUrl ??
255
+ "https://ailf-api.sanity.build";
225
256
  const apiKey = process.env.AILF_API_KEY ?? undefined;
226
- // Output directory: explicit --output-dir $CWD/.ailf/results/latest/
227
- const outputDir = resolveOutputDir(opts.outputDir);
257
+ // Output directory (W0077 Phase 6c) — `output.dir` from .ailf/config.yaml
258
+ // when set, otherwise <cwd>/.ailf/results/latest/.
259
+ const outputDir = resolveOutputDir(repoConfig?.output?.dir);
228
260
  return {
229
261
  allowedOriginArgs,
230
262
  apiKey,
@@ -232,16 +264,15 @@ export function computeResolvedOptions(opts) {
232
264
  areaOption,
233
265
  beforeOption,
234
266
  changedDocsOption,
235
- compareBaseline: opts.compareBaseline,
267
+ compareBaseline,
236
268
  compareEnabled,
237
269
  compareThreshold: opts.threshold,
238
- concurrency: opts.concurrency,
270
+ concurrency,
239
271
  datasetOverride,
240
272
  debug,
241
- discoveryReportEnabled: opts.discoveryReport,
242
273
  dryRun: opts.dryRun,
243
- gapAnalysisEnabled: opts.gapAnalysis,
244
- graderReplications: opts.graderReplications,
274
+ gapAnalysisEnabled,
275
+ graderReplications,
245
276
  headerArgs,
246
277
  impactSummary,
247
278
  mode,
@@ -256,15 +287,14 @@ export function computeResolvedOptions(opts) {
256
287
  promptfooUrl: opts.promptfooUrl,
257
288
  publishEnabled,
258
289
  publishExplicit,
259
- publishTag: opts.publishTag,
260
- readinessEnabled: opts.readiness,
290
+ publishTag,
261
291
  remote,
262
292
  reportDataset,
263
293
  reportProjectId,
264
294
  sanityDocumentArgs,
265
295
  searchMode,
266
- skipEval: opts.skipEval,
267
- skipFetch: opts.skipFetch,
296
+ skipEval: opts.eval === false,
297
+ skipFetch: opts.fetch === false,
268
298
  source: opts.source,
269
299
  studioOriginOverride,
270
300
  repoTasksPath: resolvedRepoTasksPath,
@@ -272,37 +302,55 @@ export function computeResolvedOptions(opts) {
272
302
  tagOption,
273
303
  taskSourceType: resolvedTaskSourceType,
274
304
  urlArgs,
275
- artifactsDisabled: opts.artifacts === false,
276
- artifactsDir: resolveArtifactsDir(opts),
277
- artifactsDryRun: opts.artifactsDryRun,
278
- artifactsExclude: parseArtifactsExcludeList(opts.artifactsExclude),
305
+ // Artifact-writer settings (W0077 Phase 6g) — `--no-artifacts`,
306
+ // `--artifacts-dir`, and `--artifacts-exclude` retired. Cascade:
307
+ // AILF_ARTIFACTS_DIR > .ailf/config.yaml `artifacts.dir` > default
308
+ // .ailf/config.yaml `artifacts.enabled: false` > writers attached
309
+ // .ailf/config.yaml `artifacts.exclude` > no exclusions
310
+ // `--no-artifacts-write` (artifactsDryRun) stays per-run.
311
+ artifactsDisabled: repoConfig?.artifacts?.enabled === false,
312
+ artifactsDir: process.env.AILF_ARTIFACTS_DIR ?? repoConfig?.artifacts?.dir,
313
+ artifactsDryRun: opts.artifactsWrite === false,
314
+ artifactsExclude: repoConfig?.artifacts?.exclude,
279
315
  classificationOption: opts.classification?.trim() || undefined,
280
- ownerTeamOption: opts.ownerTeam?.trim() || undefined,
281
- ownerIndividualOption: opts.ownerIndividual?.trim() || undefined,
316
+ // Owner attribution (W0077 Phase 6f) `--owner-team` and
317
+ // `--owner-individual` retired. Cascade: AILF_OWNER_TEAM /
318
+ // AILF_OWNER_INDIVIDUAL env vars > .ailf/config.yaml `owner.*` > undefined.
319
+ // Downstream resolution in build-request.ts already honors the env var as a
320
+ // fallback when this option is unset, but threading it through here keeps
321
+ // the cascade order explicit.
322
+ ownerTeamOption: process.env.AILF_OWNER_TEAM?.trim() ||
323
+ repoConfig?.owner?.team ||
324
+ undefined,
325
+ ownerIndividualOption: process.env.AILF_OWNER_INDIVIDUAL?.trim() ||
326
+ repoConfig?.owner?.individual ||
327
+ undefined,
282
328
  purposeOption: opts.purpose?.trim() || undefined,
283
329
  labelOptions: opts.label ?? [],
284
330
  };
285
331
  }
332
+ const PUBLISH_AUTO_VALUES = ["always", "full-runs", "never"];
286
333
  /**
287
- * Resolve the artifacts output directory from CLI flags and env vars.
288
- * Precedence (highest first):
289
- * 1. `--artifacts-dir` flag
290
- * 2. `AILF_ARTIFACTS_DIR` env var
334
+ * Resolve the publish auto policy. Precedence:
335
+ * 1. .ailf/config.yaml `publish.auto`
336
+ * 2. AILF_PUBLISH_AUTO env var
337
+ * 3. Default: "full-runs" (preserves the historical smart default)
291
338
  *
292
- * The `--capture-dir` / `AILF_CAPTURE_DIR` aliases were retired in W0052;
293
- * callers of those names are rejected at CLI entry (see cli.ts).
339
+ * Unrecognized env-var values fall through to the default with a warning;
340
+ * the schema validates the config-file value at parse time.
294
341
  */
295
- function resolveArtifactsDir(opts) {
296
- return opts.artifactsDir ?? process.env.AILF_ARTIFACTS_DIR;
297
- }
298
- function parseArtifactsExcludeList(raw) {
299
- if (!raw)
300
- return undefined;
301
- const list = raw
302
- .split(",")
303
- .map((s) => s.trim())
304
- .filter(Boolean);
305
- return list.length > 0 ? list : undefined;
342
+ function resolvePublishAuto(repoValue) {
343
+ if (repoValue)
344
+ return repoValue;
345
+ const envValue = process.env.AILF_PUBLISH_AUTO?.trim();
346
+ if (envValue &&
347
+ PUBLISH_AUTO_VALUES.includes(envValue)) {
348
+ return envValue;
349
+ }
350
+ if (envValue) {
351
+ console.warn(`⚠️ AILF_PUBLISH_AUTO="${envValue}" is not recognized; valid values are ${PUBLISH_AUTO_VALUES.join(", ")}. Falling back to "full-runs".`);
352
+ }
353
+ return "full-runs";
306
354
  }
307
355
  /** Resolve and validate the --task-source flag value. */
308
356
  function resolveTaskSourceType(raw) {
@@ -370,18 +418,11 @@ export async function executePipeline(cliOpts) {
370
418
  const callerCwd = getCallerCwd();
371
419
  const adapter = new FileConfigAdapter(cliOpts.config, ROOT);
372
420
  const config = await adapter.resolve();
373
- // Merge CLI-only flags that aren't in the config file.
374
- // The file config (from the API payload) has taskSourceType and other
375
- // pipeline options, but CLI-only args like --repo-tasks-path and
376
- // --output are only available from the command line.
377
- if (cliOpts.repoTasksPath) {
378
- config.repoTasksPath = resolve(callerCwd, cliOpts.repoTasksPath);
379
- }
380
- else if (config.taskSourceType === "repo" && !config.repoTasksPath) {
381
- // Default: when taskSource=repo but no path set, look in .ailf/tasks/
382
- // (matches the `ailf init` scaffold location). Silent fallback here —
383
- // composition root will surface a helpful error if the directory is
384
- // missing.
421
+ // When `taskSource.type` is `repo` and no `repoTasksPath` was set in
422
+ // the config file, fall back to `<callerCwd>/.ailf/tasks/` (the
423
+ // location `ailf init` scaffolds). Silent fallback — composition-root
424
+ // surfaces a helpful error if the directory is missing.
425
+ if (config.taskSourceType === "repo" && !config.repoTasksPath) {
385
426
  const defaultPath = resolve(callerCwd, ".ailf", "tasks");
386
427
  if (existsSync(defaultPath)) {
387
428
  config.repoTasksPath = defaultPath;
@@ -390,18 +431,13 @@ export async function executePipeline(cliOpts) {
390
431
  if (cliOpts.output) {
391
432
  config.outputPath = resolve(callerCwd, cliOpts.output);
392
433
  }
393
- // Output dir: explicit CLI flag $CWD/.ailf/results/latest/
394
- config.outputDir = resolveOutputDir(cliOpts.outputDir);
395
- // Artifact options CLI flags and env vars aren't in the config file,
396
- // so merge them here (same logic as resolveOptions).
397
- const resolvedArtifactsDir = resolveArtifactsDir(cliOpts);
398
- config.artifactsDisabled ??= cliOpts.artifacts === false;
399
- config.artifactsDir ??= resolvedArtifactsDir;
400
- config.artifactsDryRun ??= cliOpts.artifactsDryRun;
401
- const excludeList = parseArtifactsExcludeList(cliOpts.artifactsExclude);
402
- if (excludeList) {
403
- config.artifactsExclude = excludeList;
404
- }
434
+ // Artifact-writer env-var fallbacks. The adapter populates the bulk of
435
+ // the artifact settings from `EvalConfigSchema.artifacts.*` (W0077
436
+ // Phase 6g); we layer the env-var fallbacks here for fields the schema
437
+ // doesn't cover (GCS bucket, upload mode), and the AILF_ARTIFACTS_DIR
438
+ // override that wins over both schema and CLI.
439
+ config.artifactsDir = process.env.AILF_ARTIFACTS_DIR ?? config.artifactsDir;
440
+ config.artifactsDryRun ??= cliOpts.artifactsWrite === false;
405
441
  config.artifactGcsBucket ??= process.env.AILF_GCS_ARTIFACT_BUCKET;
406
442
  config.artifactUpload ??= parseArtifactUploadEnv(process.env.AILF_ARTIFACT_UPLOAD);
407
443
  // Create AppContext directly from the merged config so adapters
@@ -470,18 +506,16 @@ function writePipelineResult(result, outputDir) {
470
506
  console.log(` 📄 Pipeline result: ${resultFile}\n`);
471
507
  }
472
508
  /**
473
- * Load .ailf/config.yaml if --repo-tasks-path is set and the config file
474
- * exists. Returns null if not applicable.
509
+ * Load `<cwd>/.ailf/config.yaml` if it exists. Returns null when the file
510
+ * is absent or unparseable.
475
511
  *
476
- * The config.yaml lives one level up from the tasks/ directory:
477
- * .ailf/config.yaml ← config
478
- * .ailf/tasks/ ← repoTasksPath
512
+ * Auto-loads regardless of `--task-source`: the same `.ailf/config.yaml` is
513
+ * the per-environment configuration home for every run (W0077 Phase 6a).
514
+ * Subsequent flag-family migrations (6b–6h) read additional fields from
515
+ * this same file via the same loader.
479
516
  */
480
- function loadRepoConfigIfPresent(repoTasksPath) {
481
- if (!repoTasksPath)
482
- return null;
483
- // .ailf/tasks/ → .ailf/config.yaml
484
- const configPath = resolve(repoTasksPath, "..", "config.yaml");
517
+ function loadRepoConfigIfPresent(cwd) {
518
+ const configPath = resolve(cwd, ".ailf", "config.yaml");
485
519
  if (!existsSync(configPath))
486
520
  return null;
487
521
  try {
@@ -16,7 +16,7 @@ const ROOT = resolve(__dirname, "..", "..");
16
16
  export function createPrCommentCommand() {
17
17
  const cmd = new Command("pr-comment")
18
18
  .description("Generate a markdown PR comment from evaluation scores")
19
- .option("--output <path>", "Write comment to file (default: stdout)")
19
+ .option("-o, --output <path>", "Write comment to file (default: stdout)")
20
20
  .option("--promptfoo-url <url>", "Promptfoo share URL to include")
21
21
  .action(async (opts) => {
22
22
  try {
@@ -29,8 +29,6 @@ export function createPrCommentCommand() {
29
29
  skipEval: true,
30
30
  compareEnabled: false,
31
31
  gapAnalysisEnabled: false,
32
- readinessEnabled: false,
33
- discoveryReportEnabled: false,
34
32
  publishEnabled: false,
35
33
  noCache: true,
36
34
  noRemoteCache: true,
@@ -4,7 +4,7 @@
4
4
  *
5
5
  * Reads a score-summary.json (defaulting to results/latest/score-summary.json),
6
6
  * builds provenance, writes the report to Sanity, and fans out to configured
7
- * sinks — exactly the same as the publish step in `ailf pipeline`, but
7
+ * sinks — exactly the same as the publish step in `ailf run`, but
8
8
  * standalone.
9
9
  *
10
10
  * Uses createAppContext() (composition root) for all infrastructure access.
@@ -4,7 +4,7 @@
4
4
  *
5
5
  * Reads a score-summary.json (defaulting to results/latest/score-summary.json),
6
6
  * builds provenance, writes the report to Sanity, and fans out to configured
7
- * sinks — exactly the same as the publish step in `ailf pipeline`, but
7
+ * sinks — exactly the same as the publish step in `ailf run`, but
8
8
  * standalone.
9
9
  *
10
10
  * Uses createAppContext() (composition root) for all infrastructure access.
@@ -88,7 +88,6 @@ async function runPublishCommand(summaryPath, outputDir, opts) {
88
88
  // Wire up infrastructure via composition root
89
89
  const ctx = createAppContext({
90
90
  compareEnabled: false,
91
- discoveryReportEnabled: false,
92
91
  gapAnalysisEnabled: false,
93
92
  mode: "literacy",
94
93
  noAutoScope: false,
@@ -97,7 +96,6 @@ async function runPublishCommand(summaryPath, outputDir, opts) {
97
96
  outputDir,
98
97
  publishEnabled: true,
99
98
  publishTag: opts.tag,
100
- readinessEnabled: false,
101
99
  rootDir: ROOT,
102
100
  searchMode: "open",
103
101
  skipEval: true,
@@ -117,7 +115,7 @@ async function runPublishCommand(summaryPath, outputDir, opts) {
117
115
  if (!existsSync(resolvedPath)) {
118
116
  console.error(` ✖ File not found: ${resolvedPath}`);
119
117
  console.error();
120
- console.error(" Hint: Run `ailf pipeline` first to generate results,");
118
+ console.error(" Hint: Run `ailf run` first to generate results,");
121
119
  console.error(" or provide a path to an existing score-summary.json.");
122
120
  process.exit(1);
123
121
  }
@@ -15,23 +15,32 @@ import { formatReadinessMarkdown, generateReadinessReport, } from "../pipeline/r
15
15
  import { ThresholdConfigSchema, } from "../pipeline/schemas.js";
16
16
  const __dirname = dirname(fileURLToPath(import.meta.url));
17
17
  const ROOT = resolve(__dirname, "..", "..");
18
- const SCORE_SUMMARY_PATH = join(ROOT, "results", "latest", "score-summary.json");
19
- const GAP_ANALYSIS_PATH = join(ROOT, "results", "latest", "gap-analysis.json");
18
+ const DEFAULT_RESULTS_DIR = join(ROOT, "results", "latest");
20
19
  // thresholds loaded via loadConfigFile below
21
20
  const BASELINES_DIR = join(ROOT, "results", "baselines");
21
+ /** Resolve `--from-run` to an absolute results directory. */
22
+ function resolveFromRun(value) {
23
+ if (value === "latest")
24
+ return DEFAULT_RESULTS_DIR;
25
+ return resolve(value);
26
+ }
22
27
  export function createReadinessReportCommand() {
23
- return new Command("readiness-report")
28
+ return new Command("readiness")
24
29
  .description("Generate launch readiness report for a feature area")
25
30
  .requiredOption("-a, --area <area>", "Feature area to evaluate (required)")
31
+ .option("--from-run <path>", "Results directory to read from (`latest` or a path to a results directory containing score-summary.json)", "latest")
26
32
  .option("-H, --history", "Include historical progress from baselines", false)
27
33
  .option("-o, --output <path>", "Write markdown to file instead of stdout")
28
34
  .action(async (opts) => {
35
+ const resultsDir = resolveFromRun(opts.fromRun);
36
+ const scoreSummaryPath = join(resultsDir, "score-summary.json");
37
+ const gapAnalysisPath = join(resultsDir, "gap-analysis.json");
29
38
  // Load score summary
30
- if (!existsSync(SCORE_SUMMARY_PATH)) {
31
- console.error(`❌ Score summary not found at ${SCORE_SUMMARY_PATH}. Run \`pnpm pipeline\` first.`);
39
+ if (!existsSync(scoreSummaryPath)) {
40
+ console.error(`❌ Score summary not found at ${scoreSummaryPath}. Run \`ailf run\` first or pass --from-run <path>.`);
32
41
  process.exit(1);
33
42
  }
34
- const scoreSummary = JSON.parse(readFileSync(SCORE_SUMMARY_PATH, "utf-8"));
43
+ const scoreSummary = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
35
44
  // Load threshold config
36
45
  let parsedThresholds;
37
46
  try {
@@ -57,8 +66,8 @@ export function createReadinessReportCommand() {
57
66
  const thresholdConfig = thresholdResult.data;
58
67
  // Load gap analysis (optional)
59
68
  let gapAnalysis;
60
- if (existsSync(GAP_ANALYSIS_PATH)) {
61
- gapAnalysis = JSON.parse(readFileSync(GAP_ANALYSIS_PATH, "utf-8"));
69
+ if (existsSync(gapAnalysisPath)) {
70
+ gapAnalysis = JSON.parse(readFileSync(gapAnalysisPath, "utf-8"));
62
71
  }
63
72
  const history = [];
64
73
  if (opts.history && existsSync(BASELINES_DIR)) {