@sanity/ailf 3.7.0 → 3.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/airbyte/ai_literacy_framework.connector.yaml +1 -1
- package/config/thresholds.ts +3 -3
- package/dist/_vendor/ailf-core/examples/index.d.ts +2 -2
- package/dist/_vendor/ailf-core/examples/index.js +2 -2
- package/dist/_vendor/ailf-core/ports/context.d.ts +0 -4
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +38 -12
- package/dist/_vendor/ailf-core/schemas/eval-config.js +102 -22
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +4 -6
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -3
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +2 -2
- package/dist/_vendor/ailf-shared/run-classification.d.ts +2 -2
- package/dist/_vendor/ailf-shared/run-classification.js +1 -1
- package/dist/_vendor/ailf-shared/run-context.d.ts +1 -1
- package/dist/adapters/api-client/build-request.d.ts +0 -2
- package/dist/adapters/api-client/build-request.js +2 -6
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +1 -1
- package/dist/adapters/config-sources/file-config-adapter.d.ts +1 -1
- package/dist/adapters/config-sources/file-config-adapter.js +38 -12
- package/dist/adapters/task-sources/repo-schemas.d.ts +38 -0
- package/dist/adapters/task-sources/repo-schemas.js +127 -0
- package/dist/cli.d.ts +2 -2
- package/dist/cli.js +134 -38
- package/dist/commands/agent-report.js +1 -1
- package/dist/commands/calculate-scores.js +0 -2
- package/dist/commands/check-staleness.js +1 -1
- package/dist/commands/chronic-failures.js +4 -4
- package/dist/commands/coverage-audit.js +6 -7
- package/dist/commands/discovery-report.js +16 -4
- package/dist/commands/eval.d.ts +1 -1
- package/dist/commands/eval.js +1 -1
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +13 -44
- package/dist/commands/fetch-docs.js +0 -2
- package/dist/commands/generate-configs.js +0 -2
- package/dist/commands/grader/index.js +3 -3
- package/dist/commands/init.d.ts +2 -2
- package/dist/commands/init.js +10 -9
- package/dist/commands/interactive.d.ts +1 -1
- package/dist/commands/interactive.js +8 -8
- package/dist/commands/pipeline-action.d.ts +1 -3
- package/dist/commands/pipeline-action.js +174 -140
- package/dist/commands/pr-comment.js +1 -3
- package/dist/commands/publish.d.ts +1 -1
- package/dist/commands/publish.js +2 -4
- package/dist/commands/readiness-report.js +17 -8
- package/dist/commands/remote-pipeline.d.ts +1 -1
- package/dist/commands/remote-pipeline.js +1 -3
- package/dist/commands/run.d.ts +64 -0
- package/dist/commands/{pipeline.js → run.js} +19 -30
- package/dist/commands/shared/help.js +4 -4
- package/dist/commands/shared/options.d.ts +29 -3
- package/dist/commands/shared/options.js +37 -13
- package/dist/commands/validate-tasks.js +1 -1
- package/dist/commands/validate.d.ts +1 -1
- package/dist/commands/validate.js +2 -2
- package/dist/commands/weekly-digest.js +3 -3
- package/dist/config/thresholds.ts +3 -3
- package/dist/orchestration/build-app-context.js +0 -2
- package/dist/orchestration/build-step-sequence.js +1 -11
- package/dist/orchestration/steps/fetch-docs-step.js +1 -1
- package/dist/orchestration/steps/index.d.ts +0 -2
- package/dist/orchestration/steps/index.js +0 -2
- package/dist/orchestration/steps/run-eval-step.js +1 -1
- package/dist/pipeline/cache.d.ts +1 -1
- package/dist/pipeline/map-request-to-config.js +0 -2
- package/dist/pipeline/plan.d.ts +2 -4
- package/dist/pipeline/plan.js +4 -32
- package/dist/pipeline/run-context.d.ts +1 -1
- package/dist/pipeline/run-context.js +4 -4
- package/dist/pipeline/validate.d.ts +1 -1
- package/dist/pipeline/validate.js +1 -1
- package/package.json +7 -7
- package/dist/commands/pipeline.d.ts +0 -77
- package/dist/orchestration/steps/discovery-report-step.d.ts +0 -13
- package/dist/orchestration/steps/discovery-report-step.js +0 -62
- package/dist/orchestration/steps/readiness-step.d.ts +0 -13
- package/dist/orchestration/steps/readiness-step.js +0 -98
|
@@ -37,6 +37,10 @@ const VALID_SEARCH_MODES = ["open", "origin-only", "off"];
|
|
|
37
37
|
export function computeResolvedOptions(opts) {
|
|
38
38
|
// Resolve paths relative to the caller's cwd, not the eval package root
|
|
39
39
|
const callerCwd = getCallerCwd();
|
|
40
|
+
// `.ailf/config.yaml` is the per-environment config home for `ailf run`
|
|
41
|
+
// (W0077 Phase 6a). Load early so downstream cascades (source, agentic,
|
|
42
|
+
// owner, output, etc.) can read from it.
|
|
43
|
+
const repoConfig = loadRepoConfigIfPresent(callerCwd);
|
|
40
44
|
// Validate + normalize mode via the single boundary function.
|
|
41
45
|
// normalizeMode() maps legacy variant names (baseline, agentic, etc.)
|
|
42
46
|
// to canonical mode "literacy" + variant, and throws on invalid input.
|
|
@@ -59,34 +63,34 @@ export function computeResolvedOptions(opts) {
|
|
|
59
63
|
console.error(`❌ ${err instanceof Error ? err.message : String(err)}`);
|
|
60
64
|
process.exit(1);
|
|
61
65
|
}
|
|
62
|
-
// Debug options — any
|
|
63
|
-
// implies --debug, so users don't
|
|
64
|
-
// When DEBUG_EVAL is explicitly "0", ignore the
|
|
65
|
-
// CLI flags
|
|
66
|
+
// Debug + filter options — any filter flag (--filter-first-n,
|
|
67
|
+
// --filter-pattern, --filter-sample) implies --debug, so users don't
|
|
68
|
+
// need to pass both. When DEBUG_EVAL is explicitly "0", ignore the env
|
|
69
|
+
// vars. CLI flags always win over env vars.
|
|
66
70
|
const debugEnvDisabled = process.env.DEBUG_EVAL === "0";
|
|
67
|
-
const
|
|
71
|
+
const filterFirstN = opts.filterFirstN ??
|
|
68
72
|
(process.env.DEBUG_EVAL_N && !debugEnvDisabled
|
|
69
73
|
? parseInt(process.env.DEBUG_EVAL_N, 10)
|
|
70
74
|
: undefined);
|
|
71
|
-
const
|
|
75
|
+
const filterPattern = opts.filterPattern ??
|
|
72
76
|
(process.env.DEBUG_EVAL_PATTERN && !debugEnvDisabled
|
|
73
77
|
? process.env.DEBUG_EVAL_PATTERN
|
|
74
78
|
: undefined);
|
|
75
|
-
const
|
|
79
|
+
const filterSample = opts.filterSample ??
|
|
76
80
|
(process.env.DEBUG_EVAL_SAMPLE && !debugEnvDisabled
|
|
77
81
|
? parseInt(process.env.DEBUG_EVAL_SAMPLE, 10)
|
|
78
82
|
: undefined);
|
|
79
83
|
const debugEnabled = opts.debug ||
|
|
80
84
|
process.env.DEBUG_EVAL === "1" ||
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
85
|
+
filterFirstN !== undefined ||
|
|
86
|
+
filterPattern !== undefined ||
|
|
87
|
+
filterSample !== undefined;
|
|
84
88
|
const debug = debugEnabled
|
|
85
89
|
? {
|
|
86
90
|
enabled: true,
|
|
87
|
-
firstN:
|
|
88
|
-
pattern:
|
|
89
|
-
sample:
|
|
91
|
+
firstN: filterFirstN,
|
|
92
|
+
pattern: filterPattern,
|
|
93
|
+
sample: filterSample,
|
|
90
94
|
}
|
|
91
95
|
: undefined;
|
|
92
96
|
// Search mode validation
|
|
@@ -95,16 +99,29 @@ export function computeResolvedOptions(opts) {
|
|
|
95
99
|
console.error(`❌ Invalid --search mode "${searchMode}". Must be one of: ${VALID_SEARCH_MODES.join(", ")}`);
|
|
96
100
|
process.exit(1);
|
|
97
101
|
}
|
|
98
|
-
// Merge repeatable args (singular + plural aliases)
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
//
|
|
104
|
-
|
|
105
|
-
const
|
|
102
|
+
// Merge repeatable args (singular + plural aliases). `headerArgs` and
|
|
103
|
+
// `allowedOriginArgs` are populated from `.ailf/config.yaml`'s `agentic`
|
|
104
|
+
// block (W0077 Phase 6f); the CLI flags `--header` and `--allowed-origin`
|
|
105
|
+
// were retired. The URL-classification block below may still append a
|
|
106
|
+
// host to `allowedOriginArgs` when neither config nor CLI provided one.
|
|
107
|
+
// The `DOC_HEADERS` and `DOC_ALLOWED_ORIGIN(S)` env vars still merge in
|
|
108
|
+
// sources.ts at the doc-fetch boundary (additive, unchanged).
|
|
109
|
+
const urlArgs = opts.url;
|
|
110
|
+
const headerArgs = [];
|
|
111
|
+
const allowedOriginArgs = [];
|
|
112
|
+
const sanityDocumentArgs = opts.sanityDocument;
|
|
113
|
+
if (repoConfig?.agentic?.headers) {
|
|
114
|
+
for (const [key, value] of Object.entries(repoConfig.agentic.headers)) {
|
|
115
|
+
headerArgs.push(`${key}: ${value}`);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
if (repoConfig?.agentic?.allowedOrigins) {
|
|
119
|
+
allowedOriginArgs.push(...repoConfig.agentic.allowedOrigins);
|
|
120
|
+
}
|
|
121
|
+
// Source overrides — perspective stays per-run (CLI flag), the dataset /
|
|
122
|
+
// project / studio-origin trio moved into `.ailf/config.yaml`'s `source`
|
|
123
|
+
// block in W0077 Phase 6d. Env vars still win over the config-file value.
|
|
106
124
|
const perspectiveOverride = opts.sanityPerspective;
|
|
107
|
-
const studioOriginOverride = opts.sanityStudioOrigin;
|
|
108
125
|
// URL classification (pure computation — results captured, not applied to env)
|
|
109
126
|
if (urlArgs.length > 0) {
|
|
110
127
|
const classification = classifyUrls(urlArgs);
|
|
@@ -115,21 +132,6 @@ export function computeResolvedOptions(opts) {
|
|
|
115
132
|
sanityDocumentArgs.push(...merged);
|
|
116
133
|
}
|
|
117
134
|
}
|
|
118
|
-
// Validate custom headers (early error)
|
|
119
|
-
if (headerArgs.length > 0) {
|
|
120
|
-
for (const h of headerArgs) {
|
|
121
|
-
const colonIdx = h.indexOf(":");
|
|
122
|
-
if (colonIdx === -1) {
|
|
123
|
-
console.error(`❌ Invalid header format: "${h}". Expected "Key: Value".`);
|
|
124
|
-
process.exit(1);
|
|
125
|
-
}
|
|
126
|
-
const key = h.slice(0, colonIdx).trim();
|
|
127
|
-
if (!key) {
|
|
128
|
-
console.error(`❌ Invalid header: empty key in "${h}"`);
|
|
129
|
-
process.exit(1);
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
135
|
// Auto-infer allowed origin from --url
|
|
134
136
|
if (urlArgs.length > 0 && allowedOriginArgs.length === 0) {
|
|
135
137
|
try {
|
|
@@ -170,22 +172,48 @@ export function computeResolvedOptions(opts) {
|
|
|
170
172
|
}
|
|
171
173
|
}
|
|
172
174
|
}
|
|
173
|
-
// Comparison: --before auto-enables --compare
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
//
|
|
175
|
+
// Comparison: --before-source auto-enables --compare. The `--compare` flag
|
|
176
|
+
// is a Commander optional-argument: undefined when not passed, `true` for
|
|
177
|
+
// the bare flag (compare against latest), and a string path when the user
|
|
178
|
+
// pinned a specific baseline (`--compare path/to/baseline.json`).
|
|
179
|
+
const beforeOption = opts.beforeSource;
|
|
180
|
+
const compareEnabled = (opts.compare !== undefined && opts.compare !== false) ||
|
|
181
|
+
beforeOption !== undefined;
|
|
182
|
+
const compareBaseline = typeof opts.compare === "string" ? opts.compare : undefined;
|
|
183
|
+
// Task-source resolution (W0077 Phase 6h) — `--task-source` and
|
|
184
|
+
// `--repo-tasks-path` retired. Both move under `taskSource: {...}` in
|
|
185
|
+
// `.ailf/config.yaml`. Cascade: config → built-in default (content-lake).
|
|
186
|
+
// When type is `repo` and no path is set, fall back to `<cwd>/.ailf/tasks/`
|
|
187
|
+
// (the location `ailf init` scaffolds).
|
|
188
|
+
const resolvedTaskSourceType = resolveTaskSourceType(repoConfig?.taskSource?.type);
|
|
189
|
+
const resolvedRepoTasksPath = resolveRepoTasksPath(callerCwd, repoConfig?.taskSource?.repoTasksPath, resolvedTaskSourceType);
|
|
190
|
+
// Source overrides (W0077 Phase 6d) — `--sanity-dataset`, `--sanity-project`,
|
|
191
|
+
// and `--sanity-studio-origin` were retired from `ailf run`. Cascade is now:
|
|
192
|
+
// env var > .ailf/config.yaml `source.*` > built-in default (in sources.ts).
|
|
193
|
+
const datasetOverride = process.env.SANITY_DATASET ?? repoConfig?.source?.dataset;
|
|
194
|
+
const projectIdOverride = process.env.SANITY_PROJECT_ID ?? repoConfig?.source?.projectId;
|
|
195
|
+
const studioOriginOverride = process.env.SANITY_STUDIO_ORIGIN ?? repoConfig?.source?.studioOrigin;
|
|
196
|
+
// Report store overrides (W0077 Phase 6e — `--report-dataset` and
|
|
197
|
+
// `--report-project` retired). Resolution order:
|
|
198
|
+
// 1. Environment variables (AILF_REPORT_DATASET, AILF_REPORT_PROJECT_ID)
|
|
199
|
+
// 2. .ailf/config.yaml reportStore block
|
|
200
|
+
// 3. Eval dataset override (so perspective evals publish to the same dataset)
|
|
201
|
+
const reportDataset = process.env.AILF_REPORT_DATASET ??
|
|
202
|
+
repoConfig?.reportStore?.dataset ??
|
|
203
|
+
datasetOverride ??
|
|
204
|
+
undefined;
|
|
205
|
+
const reportProjectId = process.env.AILF_REPORT_PROJECT_ID ??
|
|
206
|
+
repoConfig?.reportStore?.projectId ??
|
|
207
|
+
undefined;
|
|
208
|
+
// Publish polarity (W0077 Phase 4) — auto policy lives in
|
|
209
|
+
// .ailf/config.yaml's `publish.auto` (or env / default). CLI flags and
|
|
210
|
+
// AILF_PUBLISH still override the policy.
|
|
177
211
|
const reportStoreToken = process.env.AILF_REPORT_SANITY_API_TOKEN ?? process.env.SANITY_API_TOKEN;
|
|
178
212
|
const reportStoreConfigured = Boolean(reportStoreToken);
|
|
179
|
-
// Track whether the user explicitly chose --publish or --no-publish.
|
|
180
|
-
// In remote mode, when this is false we omit the field from the API
|
|
181
|
-
// request so the server can apply its own default (publish when jobId
|
|
182
|
-
// is present). Without this, the local smart-default (which checks for
|
|
183
|
-
// a local Sanity token the CLI doesn't have) would send publish:false
|
|
184
|
-
// and suppress server-side report publishing.
|
|
185
213
|
const publishExplicit = opts.publish !== undefined || process.env.AILF_PUBLISH !== undefined;
|
|
214
|
+
const publishAuto = resolvePublishAuto(repoConfig?.publish?.auto);
|
|
186
215
|
let publishEnabled;
|
|
187
216
|
if (opts.publish !== undefined) {
|
|
188
|
-
// Explicit --publish or --no-publish always wins
|
|
189
217
|
publishEnabled = opts.publish;
|
|
190
218
|
}
|
|
191
219
|
else if (process.env.AILF_PUBLISH === "1") {
|
|
@@ -195,36 +223,40 @@ export function computeResolvedOptions(opts) {
|
|
|
195
223
|
publishEnabled = false;
|
|
196
224
|
}
|
|
197
225
|
else {
|
|
198
|
-
//
|
|
199
|
-
|
|
226
|
+
// Apply the auto policy. The report store still has to be configured
|
|
227
|
+
// for `auto: "always"` and `"full-runs"` — without a token, publishing
|
|
228
|
+
// is impossible regardless of policy.
|
|
229
|
+
switch (publishAuto) {
|
|
230
|
+
case "always":
|
|
231
|
+
publishEnabled = reportStoreConfigured;
|
|
232
|
+
break;
|
|
233
|
+
case "never":
|
|
234
|
+
publishEnabled = false;
|
|
235
|
+
break;
|
|
236
|
+
case "full-runs":
|
|
237
|
+
default:
|
|
238
|
+
publishEnabled = reportStoreConfigured && !debugEnabled;
|
|
239
|
+
break;
|
|
240
|
+
}
|
|
200
241
|
}
|
|
201
|
-
//
|
|
202
|
-
|
|
203
|
-
//
|
|
204
|
-
//
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
// 3. .ailf/config.yaml reportStore block (when repo tasks path is set)
|
|
211
|
-
// 4. Eval dataset override (so perspective evals publish to the same dataset)
|
|
212
|
-
const repoConfig = loadRepoConfigIfPresent(resolvedRepoTasksPath);
|
|
213
|
-
const reportDataset = opts.reportDataset ??
|
|
214
|
-
process.env.AILF_REPORT_DATASET ??
|
|
215
|
-
repoConfig?.reportStore?.dataset ??
|
|
216
|
-
datasetOverride ??
|
|
217
|
-
undefined;
|
|
218
|
-
const reportProjectId = opts.reportProject ??
|
|
219
|
-
process.env.AILF_REPORT_PROJECT_ID ??
|
|
220
|
-
repoConfig?.reportStore?.projectId ??
|
|
221
|
-
undefined;
|
|
242
|
+
// Tag default cascade: --publish-tag > AILF_PUBLISH_TAG > .ailf/config.yaml
|
|
243
|
+
const publishTag = opts.publishTag ?? process.env.AILF_PUBLISH_TAG ?? repoConfig?.publish?.tag;
|
|
244
|
+
// Execution-tier resolution (W0077 Phase 6b) — concurrency, grader
|
|
245
|
+
// replications, gap analysis, and api URL all moved from CLI flags to
|
|
246
|
+
// `.ailf/config.yaml`'s `execution` block. Cascade for each:
|
|
247
|
+
// env var (where one exists) > .ailf/config.yaml > built-in default
|
|
248
|
+
const concurrency = repoConfig?.execution?.concurrency;
|
|
249
|
+
const graderReplications = repoConfig?.execution?.graderReplications;
|
|
250
|
+
const gapAnalysisEnabled = repoConfig?.execution?.gapAnalysis ?? true;
|
|
222
251
|
// Remote mode
|
|
223
252
|
const remote = opts.remote || process.env.AILF_REMOTE === "1";
|
|
224
|
-
const apiUrl =
|
|
253
|
+
const apiUrl = process.env.AILF_API_URL ??
|
|
254
|
+
repoConfig?.execution?.apiUrl ??
|
|
255
|
+
"https://ailf-api.sanity.build";
|
|
225
256
|
const apiKey = process.env.AILF_API_KEY ?? undefined;
|
|
226
|
-
// Output directory
|
|
227
|
-
|
|
257
|
+
// Output directory (W0077 Phase 6c) — `output.dir` from .ailf/config.yaml
|
|
258
|
+
// when set, otherwise <cwd>/.ailf/results/latest/.
|
|
259
|
+
const outputDir = resolveOutputDir(repoConfig?.output?.dir);
|
|
228
260
|
return {
|
|
229
261
|
allowedOriginArgs,
|
|
230
262
|
apiKey,
|
|
@@ -232,16 +264,15 @@ export function computeResolvedOptions(opts) {
|
|
|
232
264
|
areaOption,
|
|
233
265
|
beforeOption,
|
|
234
266
|
changedDocsOption,
|
|
235
|
-
compareBaseline
|
|
267
|
+
compareBaseline,
|
|
236
268
|
compareEnabled,
|
|
237
269
|
compareThreshold: opts.threshold,
|
|
238
|
-
concurrency
|
|
270
|
+
concurrency,
|
|
239
271
|
datasetOverride,
|
|
240
272
|
debug,
|
|
241
|
-
discoveryReportEnabled: opts.discoveryReport,
|
|
242
273
|
dryRun: opts.dryRun,
|
|
243
|
-
gapAnalysisEnabled
|
|
244
|
-
graderReplications
|
|
274
|
+
gapAnalysisEnabled,
|
|
275
|
+
graderReplications,
|
|
245
276
|
headerArgs,
|
|
246
277
|
impactSummary,
|
|
247
278
|
mode,
|
|
@@ -256,15 +287,14 @@ export function computeResolvedOptions(opts) {
|
|
|
256
287
|
promptfooUrl: opts.promptfooUrl,
|
|
257
288
|
publishEnabled,
|
|
258
289
|
publishExplicit,
|
|
259
|
-
publishTag
|
|
260
|
-
readinessEnabled: opts.readiness,
|
|
290
|
+
publishTag,
|
|
261
291
|
remote,
|
|
262
292
|
reportDataset,
|
|
263
293
|
reportProjectId,
|
|
264
294
|
sanityDocumentArgs,
|
|
265
295
|
searchMode,
|
|
266
|
-
skipEval: opts.
|
|
267
|
-
skipFetch: opts.
|
|
296
|
+
skipEval: opts.eval === false,
|
|
297
|
+
skipFetch: opts.fetch === false,
|
|
268
298
|
source: opts.source,
|
|
269
299
|
studioOriginOverride,
|
|
270
300
|
repoTasksPath: resolvedRepoTasksPath,
|
|
@@ -272,37 +302,55 @@ export function computeResolvedOptions(opts) {
|
|
|
272
302
|
tagOption,
|
|
273
303
|
taskSourceType: resolvedTaskSourceType,
|
|
274
304
|
urlArgs,
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
305
|
+
// Artifact-writer settings (W0077 Phase 6g) — `--no-artifacts`,
|
|
306
|
+
// `--artifacts-dir`, and `--artifacts-exclude` retired. Cascade:
|
|
307
|
+
// AILF_ARTIFACTS_DIR > .ailf/config.yaml `artifacts.dir` > default
|
|
308
|
+
// .ailf/config.yaml `artifacts.enabled: false` > writers attached
|
|
309
|
+
// .ailf/config.yaml `artifacts.exclude` > no exclusions
|
|
310
|
+
// `--no-artifacts-write` (artifactsDryRun) stays per-run.
|
|
311
|
+
artifactsDisabled: repoConfig?.artifacts?.enabled === false,
|
|
312
|
+
artifactsDir: process.env.AILF_ARTIFACTS_DIR ?? repoConfig?.artifacts?.dir,
|
|
313
|
+
artifactsDryRun: opts.artifactsWrite === false,
|
|
314
|
+
artifactsExclude: repoConfig?.artifacts?.exclude,
|
|
279
315
|
classificationOption: opts.classification?.trim() || undefined,
|
|
280
|
-
|
|
281
|
-
|
|
316
|
+
// Owner attribution (W0077 Phase 6f) — `--owner-team` and
|
|
317
|
+
// `--owner-individual` retired. Cascade: AILF_OWNER_TEAM /
|
|
318
|
+
// AILF_OWNER_INDIVIDUAL env vars > .ailf/config.yaml `owner.*` > undefined.
|
|
319
|
+
// Downstream resolution in build-request.ts already honors the env var as a
|
|
320
|
+
// fallback when this option is unset, but threading it through here keeps
|
|
321
|
+
// the cascade order explicit.
|
|
322
|
+
ownerTeamOption: process.env.AILF_OWNER_TEAM?.trim() ||
|
|
323
|
+
repoConfig?.owner?.team ||
|
|
324
|
+
undefined,
|
|
325
|
+
ownerIndividualOption: process.env.AILF_OWNER_INDIVIDUAL?.trim() ||
|
|
326
|
+
repoConfig?.owner?.individual ||
|
|
327
|
+
undefined,
|
|
282
328
|
purposeOption: opts.purpose?.trim() || undefined,
|
|
283
329
|
labelOptions: opts.label ?? [],
|
|
284
330
|
};
|
|
285
331
|
}
|
|
332
|
+
const PUBLISH_AUTO_VALUES = ["always", "full-runs", "never"];
|
|
286
333
|
/**
|
|
287
|
-
* Resolve the
|
|
288
|
-
*
|
|
289
|
-
*
|
|
290
|
-
*
|
|
334
|
+
* Resolve the publish auto policy. Precedence:
|
|
335
|
+
* 1. .ailf/config.yaml `publish.auto`
|
|
336
|
+
* 2. AILF_PUBLISH_AUTO env var
|
|
337
|
+
* 3. Default: "full-runs" (preserves the historical smart default)
|
|
291
338
|
*
|
|
292
|
-
*
|
|
293
|
-
*
|
|
339
|
+
* Unrecognized env-var values fall through to the default with a warning;
|
|
340
|
+
* the schema validates the config-file value at parse time.
|
|
294
341
|
*/
|
|
295
|
-
function
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
if (
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
.
|
|
305
|
-
|
|
342
|
+
function resolvePublishAuto(repoValue) {
|
|
343
|
+
if (repoValue)
|
|
344
|
+
return repoValue;
|
|
345
|
+
const envValue = process.env.AILF_PUBLISH_AUTO?.trim();
|
|
346
|
+
if (envValue &&
|
|
347
|
+
PUBLISH_AUTO_VALUES.includes(envValue)) {
|
|
348
|
+
return envValue;
|
|
349
|
+
}
|
|
350
|
+
if (envValue) {
|
|
351
|
+
console.warn(`⚠️ AILF_PUBLISH_AUTO="${envValue}" is not recognized; valid values are ${PUBLISH_AUTO_VALUES.join(", ")}. Falling back to "full-runs".`);
|
|
352
|
+
}
|
|
353
|
+
return "full-runs";
|
|
306
354
|
}
|
|
307
355
|
/** Resolve and validate the --task-source flag value. */
|
|
308
356
|
function resolveTaskSourceType(raw) {
|
|
@@ -370,18 +418,11 @@ export async function executePipeline(cliOpts) {
|
|
|
370
418
|
const callerCwd = getCallerCwd();
|
|
371
419
|
const adapter = new FileConfigAdapter(cliOpts.config, ROOT);
|
|
372
420
|
const config = await adapter.resolve();
|
|
373
|
-
//
|
|
374
|
-
//
|
|
375
|
-
//
|
|
376
|
-
//
|
|
377
|
-
if (
|
|
378
|
-
config.repoTasksPath = resolve(callerCwd, cliOpts.repoTasksPath);
|
|
379
|
-
}
|
|
380
|
-
else if (config.taskSourceType === "repo" && !config.repoTasksPath) {
|
|
381
|
-
// Default: when taskSource=repo but no path set, look in .ailf/tasks/
|
|
382
|
-
// (matches the `ailf init` scaffold location). Silent fallback here —
|
|
383
|
-
// composition root will surface a helpful error if the directory is
|
|
384
|
-
// missing.
|
|
421
|
+
// When `taskSource.type` is `repo` and no `repoTasksPath` was set in
|
|
422
|
+
// the config file, fall back to `<callerCwd>/.ailf/tasks/` (the
|
|
423
|
+
// location `ailf init` scaffolds). Silent fallback — composition-root
|
|
424
|
+
// surfaces a helpful error if the directory is missing.
|
|
425
|
+
if (config.taskSourceType === "repo" && !config.repoTasksPath) {
|
|
385
426
|
const defaultPath = resolve(callerCwd, ".ailf", "tasks");
|
|
386
427
|
if (existsSync(defaultPath)) {
|
|
387
428
|
config.repoTasksPath = defaultPath;
|
|
@@ -390,18 +431,13 @@ export async function executePipeline(cliOpts) {
|
|
|
390
431
|
if (cliOpts.output) {
|
|
391
432
|
config.outputPath = resolve(callerCwd, cliOpts.output);
|
|
392
433
|
}
|
|
393
|
-
//
|
|
394
|
-
|
|
395
|
-
//
|
|
396
|
-
//
|
|
397
|
-
|
|
398
|
-
config.
|
|
399
|
-
config.
|
|
400
|
-
config.artifactsDryRun ??= cliOpts.artifactsDryRun;
|
|
401
|
-
const excludeList = parseArtifactsExcludeList(cliOpts.artifactsExclude);
|
|
402
|
-
if (excludeList) {
|
|
403
|
-
config.artifactsExclude = excludeList;
|
|
404
|
-
}
|
|
434
|
+
// Artifact-writer env-var fallbacks. The adapter populates the bulk of
|
|
435
|
+
// the artifact settings from `EvalConfigSchema.artifacts.*` (W0077
|
|
436
|
+
// Phase 6g); we layer the env-var fallbacks here for fields the schema
|
|
437
|
+
// doesn't cover (GCS bucket, upload mode), and the AILF_ARTIFACTS_DIR
|
|
438
|
+
// override that wins over both schema and CLI.
|
|
439
|
+
config.artifactsDir = process.env.AILF_ARTIFACTS_DIR ?? config.artifactsDir;
|
|
440
|
+
config.artifactsDryRun ??= cliOpts.artifactsWrite === false;
|
|
405
441
|
config.artifactGcsBucket ??= process.env.AILF_GCS_ARTIFACT_BUCKET;
|
|
406
442
|
config.artifactUpload ??= parseArtifactUploadEnv(process.env.AILF_ARTIFACT_UPLOAD);
|
|
407
443
|
// Create AppContext directly from the merged config so adapters
|
|
@@ -470,18 +506,16 @@ function writePipelineResult(result, outputDir) {
|
|
|
470
506
|
console.log(` 📄 Pipeline result: ${resultFile}\n`);
|
|
471
507
|
}
|
|
472
508
|
/**
|
|
473
|
-
* Load
|
|
474
|
-
*
|
|
509
|
+
* Load `<cwd>/.ailf/config.yaml` if it exists. Returns null when the file
|
|
510
|
+
* is absent or unparseable.
|
|
475
511
|
*
|
|
476
|
-
*
|
|
477
|
-
*
|
|
478
|
-
*
|
|
512
|
+
* Auto-loads regardless of `--task-source`: the same `.ailf/config.yaml` is
|
|
513
|
+
* the per-environment configuration home for every run (W0077 Phase 6a).
|
|
514
|
+
* Subsequent flag-family migrations (6b–6h) read additional fields from
|
|
515
|
+
* this same file via the same loader.
|
|
479
516
|
*/
|
|
480
|
-
function loadRepoConfigIfPresent(
|
|
481
|
-
|
|
482
|
-
return null;
|
|
483
|
-
// .ailf/tasks/ → .ailf/config.yaml
|
|
484
|
-
const configPath = resolve(repoTasksPath, "..", "config.yaml");
|
|
517
|
+
function loadRepoConfigIfPresent(cwd) {
|
|
518
|
+
const configPath = resolve(cwd, ".ailf", "config.yaml");
|
|
485
519
|
if (!existsSync(configPath))
|
|
486
520
|
return null;
|
|
487
521
|
try {
|
|
@@ -16,7 +16,7 @@ const ROOT = resolve(__dirname, "..", "..");
|
|
|
16
16
|
export function createPrCommentCommand() {
|
|
17
17
|
const cmd = new Command("pr-comment")
|
|
18
18
|
.description("Generate a markdown PR comment from evaluation scores")
|
|
19
|
-
.option("--output <path>", "Write comment to file (default: stdout)")
|
|
19
|
+
.option("-o, --output <path>", "Write comment to file (default: stdout)")
|
|
20
20
|
.option("--promptfoo-url <url>", "Promptfoo share URL to include")
|
|
21
21
|
.action(async (opts) => {
|
|
22
22
|
try {
|
|
@@ -29,8 +29,6 @@ export function createPrCommentCommand() {
|
|
|
29
29
|
skipEval: true,
|
|
30
30
|
compareEnabled: false,
|
|
31
31
|
gapAnalysisEnabled: false,
|
|
32
|
-
readinessEnabled: false,
|
|
33
|
-
discoveryReportEnabled: false,
|
|
34
32
|
publishEnabled: false,
|
|
35
33
|
noCache: true,
|
|
36
34
|
noRemoteCache: true,
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
*
|
|
5
5
|
* Reads a score-summary.json (defaulting to results/latest/score-summary.json),
|
|
6
6
|
* builds provenance, writes the report to Sanity, and fans out to configured
|
|
7
|
-
* sinks — exactly the same as the publish step in `ailf
|
|
7
|
+
* sinks — exactly the same as the publish step in `ailf run`, but
|
|
8
8
|
* standalone.
|
|
9
9
|
*
|
|
10
10
|
* Uses createAppContext() (composition root) for all infrastructure access.
|
package/dist/commands/publish.js
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
*
|
|
5
5
|
* Reads a score-summary.json (defaulting to results/latest/score-summary.json),
|
|
6
6
|
* builds provenance, writes the report to Sanity, and fans out to configured
|
|
7
|
-
* sinks — exactly the same as the publish step in `ailf
|
|
7
|
+
* sinks — exactly the same as the publish step in `ailf run`, but
|
|
8
8
|
* standalone.
|
|
9
9
|
*
|
|
10
10
|
* Uses createAppContext() (composition root) for all infrastructure access.
|
|
@@ -88,7 +88,6 @@ async function runPublishCommand(summaryPath, outputDir, opts) {
|
|
|
88
88
|
// Wire up infrastructure via composition root
|
|
89
89
|
const ctx = createAppContext({
|
|
90
90
|
compareEnabled: false,
|
|
91
|
-
discoveryReportEnabled: false,
|
|
92
91
|
gapAnalysisEnabled: false,
|
|
93
92
|
mode: "literacy",
|
|
94
93
|
noAutoScope: false,
|
|
@@ -97,7 +96,6 @@ async function runPublishCommand(summaryPath, outputDir, opts) {
|
|
|
97
96
|
outputDir,
|
|
98
97
|
publishEnabled: true,
|
|
99
98
|
publishTag: opts.tag,
|
|
100
|
-
readinessEnabled: false,
|
|
101
99
|
rootDir: ROOT,
|
|
102
100
|
searchMode: "open",
|
|
103
101
|
skipEval: true,
|
|
@@ -117,7 +115,7 @@ async function runPublishCommand(summaryPath, outputDir, opts) {
|
|
|
117
115
|
if (!existsSync(resolvedPath)) {
|
|
118
116
|
console.error(` ✖ File not found: ${resolvedPath}`);
|
|
119
117
|
console.error();
|
|
120
|
-
console.error(" Hint: Run `ailf
|
|
118
|
+
console.error(" Hint: Run `ailf run` first to generate results,");
|
|
121
119
|
console.error(" or provide a path to an existing score-summary.json.");
|
|
122
120
|
process.exit(1);
|
|
123
121
|
}
|
|
@@ -15,23 +15,32 @@ import { formatReadinessMarkdown, generateReadinessReport, } from "../pipeline/r
|
|
|
15
15
|
import { ThresholdConfigSchema, } from "../pipeline/schemas.js";
|
|
16
16
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
17
17
|
const ROOT = resolve(__dirname, "..", "..");
|
|
18
|
-
const
|
|
19
|
-
const GAP_ANALYSIS_PATH = join(ROOT, "results", "latest", "gap-analysis.json");
|
|
18
|
+
const DEFAULT_RESULTS_DIR = join(ROOT, "results", "latest");
|
|
20
19
|
// thresholds loaded via loadConfigFile below
|
|
21
20
|
const BASELINES_DIR = join(ROOT, "results", "baselines");
|
|
21
|
+
/** Resolve `--from-run` to an absolute results directory. */
|
|
22
|
+
function resolveFromRun(value) {
|
|
23
|
+
if (value === "latest")
|
|
24
|
+
return DEFAULT_RESULTS_DIR;
|
|
25
|
+
return resolve(value);
|
|
26
|
+
}
|
|
22
27
|
export function createReadinessReportCommand() {
|
|
23
|
-
return new Command("readiness
|
|
28
|
+
return new Command("readiness")
|
|
24
29
|
.description("Generate launch readiness report for a feature area")
|
|
25
30
|
.requiredOption("-a, --area <area>", "Feature area to evaluate (required)")
|
|
31
|
+
.option("--from-run <path>", "Results directory to read from (`latest` or a path to a results directory containing score-summary.json)", "latest")
|
|
26
32
|
.option("-H, --history", "Include historical progress from baselines", false)
|
|
27
33
|
.option("-o, --output <path>", "Write markdown to file instead of stdout")
|
|
28
34
|
.action(async (opts) => {
|
|
35
|
+
const resultsDir = resolveFromRun(opts.fromRun);
|
|
36
|
+
const scoreSummaryPath = join(resultsDir, "score-summary.json");
|
|
37
|
+
const gapAnalysisPath = join(resultsDir, "gap-analysis.json");
|
|
29
38
|
// Load score summary
|
|
30
|
-
if (!existsSync(
|
|
31
|
-
console.error(`❌ Score summary not found at ${
|
|
39
|
+
if (!existsSync(scoreSummaryPath)) {
|
|
40
|
+
console.error(`❌ Score summary not found at ${scoreSummaryPath}. Run \`ailf run\` first or pass --from-run <path>.`);
|
|
32
41
|
process.exit(1);
|
|
33
42
|
}
|
|
34
|
-
const scoreSummary = JSON.parse(readFileSync(
|
|
43
|
+
const scoreSummary = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
|
|
35
44
|
// Load threshold config
|
|
36
45
|
let parsedThresholds;
|
|
37
46
|
try {
|
|
@@ -57,8 +66,8 @@ export function createReadinessReportCommand() {
|
|
|
57
66
|
const thresholdConfig = thresholdResult.data;
|
|
58
67
|
// Load gap analysis (optional)
|
|
59
68
|
let gapAnalysis;
|
|
60
|
-
if (existsSync(
|
|
61
|
-
gapAnalysis = JSON.parse(readFileSync(
|
|
69
|
+
if (existsSync(gapAnalysisPath)) {
|
|
70
|
+
gapAnalysis = JSON.parse(readFileSync(gapAnalysisPath, "utf-8"));
|
|
62
71
|
}
|
|
63
72
|
const history = [];
|
|
64
73
|
if (opts.history && existsSync(BASELINES_DIR)) {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* remote-pipeline.ts — Remote execution flow for `ailf
|
|
2
|
+
* remote-pipeline.ts — Remote execution flow for `ailf run --remote`.
|
|
3
3
|
*
|
|
4
4
|
* Reads local `.ailf/tasks/` YAML, validates locally via Zod schemas,
|
|
5
5
|
* submits to the AILF API, polls for completion, and writes the same
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* remote-pipeline.ts — Remote execution flow for `ailf
|
|
2
|
+
* remote-pipeline.ts — Remote execution flow for `ailf run --remote`.
|
|
3
3
|
*
|
|
4
4
|
* Reads local `.ailf/tasks/` YAML, validates locally via Zod schemas,
|
|
5
5
|
* submits to the AILF API, polls for completion, and writes the same
|
|
@@ -135,8 +135,6 @@ function toConfigSlice(opts) {
|
|
|
135
135
|
perspectiveOverride: opts.perspectiveOverride,
|
|
136
136
|
graderReplications: opts.graderReplications,
|
|
137
137
|
gapAnalysisEnabled: opts.gapAnalysisEnabled,
|
|
138
|
-
readinessEnabled: opts.readinessEnabled,
|
|
139
|
-
discoveryReportEnabled: opts.discoveryReportEnabled,
|
|
140
138
|
noRemoteCache: opts.noRemoteCache,
|
|
141
139
|
// D0037 / W0069 caller envelope overrides — flags override env vars
|
|
142
140
|
// inside buildCallerEnvelope(), which also merges AILF_* defaults.
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* run command — the main evaluation entry point.
|
|
3
|
+
*
|
|
4
|
+
* Renamed from `ailf pipeline` to `ailf run` per W0077 Phase 1
|
|
5
|
+
* (see docs/design-docs/pipeline-command-surface.md §3). The command drives
|
|
6
|
+
* the evaluation pipeline; the orchestrator internals retain the "pipeline"
|
|
7
|
+
* name because they describe a multi-step process, not the CLI verb.
|
|
8
|
+
*
|
|
9
|
+
* Defines all 44+ CLI flags via Commander, resolves them into a typed
|
|
10
|
+
* options object, bridges to process.env for downstream modules, and
|
|
11
|
+
* delegates to runPipeline().
|
|
12
|
+
*
|
|
13
|
+
* @see docs/guides/cli-guide.md for per-flag behavior notes, or
|
|
14
|
+
* docs/references/cli-reference.md for the auto-generated flag matrix.
|
|
15
|
+
*/
|
|
16
|
+
import { Command } from "commander";
|
|
17
|
+
/**
|
|
18
|
+
* Raw CLI options as parsed by Commander.
|
|
19
|
+
* Field names follow Commander's camelCase convention for kebab-case flags.
|
|
20
|
+
*/
|
|
21
|
+
export interface PipelineCliOptions {
|
|
22
|
+
area?: string;
|
|
23
|
+
autoScope: boolean;
|
|
24
|
+
/** `--before-source <name>` — swap the doc source to a "before" state for impact evaluation. */
|
|
25
|
+
beforeSource?: string;
|
|
26
|
+
cache: boolean;
|
|
27
|
+
changedDocs?: string;
|
|
28
|
+
/**
|
|
29
|
+
* `--compare [baseline]` — Commander optional argument.
|
|
30
|
+
* undefined → flag not passed
|
|
31
|
+
* true → bare `--compare` (compare against latest baseline)
|
|
32
|
+
* string → `--compare path/to/baseline.json` (pin to a file)
|
|
33
|
+
*/
|
|
34
|
+
compare?: boolean | string;
|
|
35
|
+
config?: string;
|
|
36
|
+
debug: boolean;
|
|
37
|
+
filterFirstN?: number;
|
|
38
|
+
filterPattern?: string;
|
|
39
|
+
filterSample?: number;
|
|
40
|
+
dryRun: boolean;
|
|
41
|
+
eval: boolean;
|
|
42
|
+
fetch: boolean;
|
|
43
|
+
mode: string;
|
|
44
|
+
variant?: string;
|
|
45
|
+
output?: string;
|
|
46
|
+
promptfooUrl?: string;
|
|
47
|
+
publish?: boolean;
|
|
48
|
+
publishTag?: string;
|
|
49
|
+
remoteCache?: boolean;
|
|
50
|
+
sanityDocument: string[];
|
|
51
|
+
sanityPerspective?: string;
|
|
52
|
+
search?: string;
|
|
53
|
+
source?: string;
|
|
54
|
+
remote: boolean;
|
|
55
|
+
task?: string;
|
|
56
|
+
tag: string[];
|
|
57
|
+
threshold?: number;
|
|
58
|
+
url: string[];
|
|
59
|
+
artifactsWrite: boolean;
|
|
60
|
+
classification?: string;
|
|
61
|
+
purpose?: string;
|
|
62
|
+
label: string[];
|
|
63
|
+
}
|
|
64
|
+
export declare function createRunCommand(): Command;
|