@sanity/ailf 3.7.0 → 3.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/airbyte/ai_literacy_framework.connector.yaml +1 -1
- package/config/thresholds.ts +3 -3
- package/dist/_vendor/ailf-core/examples/index.d.ts +2 -2
- package/dist/_vendor/ailf-core/examples/index.js +2 -2
- package/dist/_vendor/ailf-core/ports/context.d.ts +0 -4
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +38 -12
- package/dist/_vendor/ailf-core/schemas/eval-config.js +102 -22
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +4 -6
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -3
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +2 -2
- package/dist/_vendor/ailf-shared/run-classification.d.ts +2 -2
- package/dist/_vendor/ailf-shared/run-classification.js +1 -1
- package/dist/_vendor/ailf-shared/run-context.d.ts +1 -1
- package/dist/adapters/api-client/build-request.d.ts +0 -2
- package/dist/adapters/api-client/build-request.js +2 -6
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +1 -1
- package/dist/adapters/config-sources/file-config-adapter.d.ts +1 -1
- package/dist/adapters/config-sources/file-config-adapter.js +42 -17
- package/dist/adapters/task-sources/repo-schemas.d.ts +41 -3
- package/dist/adapters/task-sources/repo-schemas.js +127 -0
- package/dist/cli-program.d.ts +39 -0
- package/dist/cli-program.js +137 -0
- package/dist/cli.d.ts +8 -2
- package/dist/cli.js +128 -142
- package/dist/commands/agent-report.js +1 -1
- package/dist/commands/calculate-scores.js +0 -2
- package/dist/commands/check-staleness.js +1 -1
- package/dist/commands/chronic-failures.js +4 -4
- package/dist/commands/coverage-audit.js +6 -7
- package/dist/commands/discovery-report.js +16 -4
- package/dist/commands/eval.d.ts +1 -1
- package/dist/commands/eval.js +1 -1
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +13 -44
- package/dist/commands/fetch-docs.js +0 -2
- package/dist/commands/generate-configs.js +0 -2
- package/dist/commands/grader/index.js +3 -3
- package/dist/commands/init.d.ts +2 -2
- package/dist/commands/init.js +10 -9
- package/dist/commands/interactive.d.ts +1 -1
- package/dist/commands/interactive.js +8 -8
- package/dist/commands/pipeline-action.d.ts +1 -3
- package/dist/commands/pipeline-action.js +174 -140
- package/dist/commands/pr-comment.js +1 -3
- package/dist/commands/publish.d.ts +1 -1
- package/dist/commands/publish.js +2 -4
- package/dist/commands/readiness-report.js +17 -8
- package/dist/commands/remote-pipeline.d.ts +1 -1
- package/dist/commands/remote-pipeline.js +1 -3
- package/dist/commands/run.d.ts +64 -0
- package/dist/commands/{pipeline.js → run.js} +19 -30
- package/dist/commands/shared/help.js +4 -4
- package/dist/commands/shared/options.d.ts +29 -3
- package/dist/commands/shared/options.js +37 -13
- package/dist/commands/validate-tasks.js +1 -1
- package/dist/commands/validate.d.ts +1 -1
- package/dist/commands/validate.js +2 -2
- package/dist/commands/weekly-digest.js +3 -3
- package/dist/config/thresholds.ts +3 -3
- package/dist/orchestration/build-app-context.js +0 -2
- package/dist/orchestration/build-step-sequence.js +1 -11
- package/dist/orchestration/steps/fetch-docs-step.js +1 -1
- package/dist/orchestration/steps/index.d.ts +0 -2
- package/dist/orchestration/steps/index.js +0 -2
- package/dist/orchestration/steps/run-eval-step.js +1 -1
- package/dist/pipeline/cache.d.ts +1 -1
- package/dist/pipeline/map-request-to-config.js +0 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
- package/dist/pipeline/plan.d.ts +2 -4
- package/dist/pipeline/plan.js +4 -32
- package/dist/pipeline/run-context.d.ts +1 -1
- package/dist/pipeline/run-context.js +4 -4
- package/dist/pipeline/validate.d.ts +1 -1
- package/dist/pipeline/validate.js +1 -1
- package/package.json +11 -9
- package/dist/commands/pipeline.d.ts +0 -77
- package/dist/orchestration/steps/discovery-report-step.d.ts +0 -13
- package/dist/orchestration/steps/discovery-report-step.js +0 -62
- package/dist/orchestration/steps/readiness-step.d.ts +0 -13
- package/dist/orchestration/steps/readiness-step.js +0 -98
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +0 -366
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +0 -9
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +0 -145
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +0 -314
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +0 -486
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +0 -425
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +0 -9
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +0 -332
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +0 -12
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +0 -210
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +0 -7
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +0 -404
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +0 -184
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +0 -8
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +0 -301
- package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +0 -9
- package/dist/pipeline/compiler/__tests__/telemetry.test.js +0 -503
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +0 -509
|
@@ -95,16 +95,16 @@ export function createGraderCommand() {
|
|
|
95
95
|
.command("validate")
|
|
96
96
|
.description("Validate grader accuracy against human reference grades")
|
|
97
97
|
.option("-g, --grader <model>", "Grader model to validate")
|
|
98
|
-
.option("-
|
|
98
|
+
.option("--mae-threshold <n>", "MAE threshold for pass/fail", parseFloat, 10)
|
|
99
99
|
.action(async (opts) => {
|
|
100
100
|
try {
|
|
101
101
|
const result = await runGraderValidate({
|
|
102
102
|
graderModel: opts.grader,
|
|
103
|
-
maeThreshold: opts.
|
|
103
|
+
maeThreshold: opts.maeThreshold,
|
|
104
104
|
rootDir: ROOT,
|
|
105
105
|
});
|
|
106
106
|
if (!result.passesThreshold) {
|
|
107
|
-
console.error(`\n ❌ VALIDATION FAILED: MAE ${result.overallMae} exceeds threshold ${opts.
|
|
107
|
+
console.error(`\n ❌ VALIDATION FAILED: MAE ${result.overallMae} exceeds threshold ${opts.maeThreshold}`);
|
|
108
108
|
process.exit(1);
|
|
109
109
|
}
|
|
110
110
|
}
|
package/dist/commands/init.d.ts
CHANGED
|
@@ -11,8 +11,8 @@
|
|
|
11
11
|
*
|
|
12
12
|
* Usage:
|
|
13
13
|
* ailf init # TypeScript output (default)
|
|
14
|
-
* ailf init --
|
|
15
|
-
* ailf init --
|
|
14
|
+
* ailf init --format yaml # YAML output
|
|
15
|
+
* ailf init --format json # JSON output
|
|
16
16
|
* ailf init --force # overwrite existing files
|
|
17
17
|
* ailf init --path ./my-dir # target a specific directory
|
|
18
18
|
*/
|
package/dist/commands/init.js
CHANGED
|
@@ -11,8 +11,8 @@
|
|
|
11
11
|
*
|
|
12
12
|
* Usage:
|
|
13
13
|
* ailf init # TypeScript output (default)
|
|
14
|
-
* ailf init --
|
|
15
|
-
* ailf init --
|
|
14
|
+
* ailf init --format yaml # YAML output
|
|
15
|
+
* ailf init --format json # JSON output
|
|
16
16
|
* ailf init --force # overwrite existing files
|
|
17
17
|
* ailf init --path ./my-dir # target a specific directory
|
|
18
18
|
*/
|
|
@@ -27,7 +27,7 @@ import { probeUserLocalAilf } from "../adapters/config-sources/ailf-resolver.js"
|
|
|
27
27
|
export function createInitCommand() {
|
|
28
28
|
return new Command("init")
|
|
29
29
|
.description("Initialize a directory for AI Literacy Framework evaluation")
|
|
30
|
-
.option("--
|
|
30
|
+
.option("-f, --format <fmt>", 'Output format for generated files: "ts" (default), "yaml", or "json"', "ts")
|
|
31
31
|
.option("--force", "Overwrite existing files", false)
|
|
32
32
|
.option("--path <dir>", "Target directory (default: current directory)", ".")
|
|
33
33
|
.option("--mode <mode>", "Scaffold for a specific mode: literacy, mcp-server, custom (default: all modes)")
|
|
@@ -63,15 +63,15 @@ function taskStemsForMode(mode) {
|
|
|
63
63
|
// ---------------------------------------------------------------------------
|
|
64
64
|
async function runInit(opts) {
|
|
65
65
|
const validFormats = new Set(["ts", "yaml", "json"]);
|
|
66
|
-
if (!validFormats.has(opts.
|
|
67
|
-
console.error(` ✗ Invalid output format "${opts.
|
|
66
|
+
if (!validFormats.has(opts.format)) {
|
|
67
|
+
console.error(` ✗ Invalid output format "${opts.format}". Valid options: ts, yaml, json`);
|
|
68
68
|
process.exitCode = 1;
|
|
69
69
|
return;
|
|
70
70
|
}
|
|
71
|
-
const format = opts.
|
|
71
|
+
const format = opts.format;
|
|
72
72
|
const force = opts.force;
|
|
73
73
|
if (format === "yaml") {
|
|
74
|
-
console.warn(" ⚠ --
|
|
74
|
+
console.warn(" ⚠ --format yaml is deprecated. TypeScript (default) is the\n" +
|
|
75
75
|
" recommended format — it provides full IDE autocomplete via defineTask().\n" +
|
|
76
76
|
" YAML output will be removed in a future release.\n");
|
|
77
77
|
}
|
|
@@ -285,10 +285,11 @@ async function runInit(opts) {
|
|
|
285
285
|
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --debug");
|
|
286
286
|
console.log();
|
|
287
287
|
console.log(" 💡 Or test a remote run against your repo tasks:");
|
|
288
|
-
console.log("
|
|
288
|
+
console.log(" # First, set `taskSource: { type: repo }` in .ailf/config.yaml");
|
|
289
|
+
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest run --remote --debug");
|
|
289
290
|
console.log();
|
|
290
291
|
console.log(" 💡 Or run locally against your repo tasks:");
|
|
291
|
-
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest
|
|
292
|
+
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest run --mode=literacy --variant=full --debug --explain -y");
|
|
292
293
|
console.log();
|
|
293
294
|
}
|
|
294
295
|
// ---------------------------------------------------------------------------
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*
|
|
4
4
|
* When `ailf` is run with no arguments (or `ailf interactive`), this module
|
|
5
5
|
* prompts the user through mode selection, area scoping, debug options,
|
|
6
|
-
* and common flags — then builds and executes the equivalent `ailf
|
|
6
|
+
* and common flags — then builds and executes the equivalent `ailf run`
|
|
7
7
|
* command.
|
|
8
8
|
*
|
|
9
9
|
* Uses @inquirer/prompts for a clean, modern terminal UI.
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
*
|
|
4
4
|
* When `ailf` is run with no arguments (or `ailf interactive`), this module
|
|
5
5
|
* prompts the user through mode selection, area scoping, debug options,
|
|
6
|
-
* and common flags — then builds and executes the equivalent `ailf
|
|
6
|
+
* and common flags — then builds and executes the equivalent `ailf run`
|
|
7
7
|
* command.
|
|
8
8
|
*
|
|
9
9
|
* Uses @inquirer/prompts for a clean, modern terminal UI.
|
|
@@ -52,9 +52,9 @@ async function runInteractiveWizard() {
|
|
|
52
52
|
const workflow = await select({
|
|
53
53
|
choices: [
|
|
54
54
|
{
|
|
55
|
-
description: "Full evaluation
|
|
56
|
-
name: "Run
|
|
57
|
-
value: "
|
|
55
|
+
description: "Full evaluation run (fetch → eval → score → report)",
|
|
56
|
+
name: "Run evaluation",
|
|
57
|
+
value: "run",
|
|
58
58
|
},
|
|
59
59
|
{
|
|
60
60
|
description: "Compare current scores against a saved baseline",
|
|
@@ -193,21 +193,21 @@ async function runInteractiveWizard() {
|
|
|
193
193
|
});
|
|
194
194
|
if (debugStyle === "first-n") {
|
|
195
195
|
const n = await input({ default: "5", message: "Number of tests:" });
|
|
196
|
-
args.push("--
|
|
196
|
+
args.push("--filter-first-n", n);
|
|
197
197
|
}
|
|
198
198
|
else if (debugStyle === "sample") {
|
|
199
199
|
const n = await input({
|
|
200
200
|
default: "3",
|
|
201
201
|
message: "Sample size:",
|
|
202
202
|
});
|
|
203
|
-
args.push("--
|
|
203
|
+
args.push("--filter-sample", n);
|
|
204
204
|
}
|
|
205
205
|
else if (debugStyle === "pattern") {
|
|
206
206
|
const pattern = await input({
|
|
207
207
|
message: "Description regex (e.g. Blog, webhook):",
|
|
208
208
|
});
|
|
209
209
|
if (pattern.trim()) {
|
|
210
|
-
args.push("--
|
|
210
|
+
args.push("--filter-pattern", pattern.trim());
|
|
211
211
|
}
|
|
212
212
|
}
|
|
213
213
|
}
|
|
@@ -238,5 +238,5 @@ async function runInteractiveWizard() {
|
|
|
238
238
|
args.push("--explain", "--yes");
|
|
239
239
|
}
|
|
240
240
|
}
|
|
241
|
-
return { args, command: "
|
|
241
|
+
return { args, command: "run" };
|
|
242
242
|
}
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
*/
|
|
13
13
|
import { type ImpactSummary } from "../pipeline/reverse-mapping.js";
|
|
14
14
|
import type { DebugOptions, EvalMode } from "../pipeline/types.js";
|
|
15
|
-
import type { PipelineCliOptions } from "./
|
|
15
|
+
import type { PipelineCliOptions } from "./run.js";
|
|
16
16
|
export interface ResolvedOptions {
|
|
17
17
|
allowedOriginArgs: string[];
|
|
18
18
|
areaOption?: string;
|
|
@@ -24,7 +24,6 @@ export interface ResolvedOptions {
|
|
|
24
24
|
concurrency?: number;
|
|
25
25
|
datasetOverride?: string;
|
|
26
26
|
debug?: DebugOptions;
|
|
27
|
-
discoveryReportEnabled: boolean;
|
|
28
27
|
dryRun: boolean;
|
|
29
28
|
gapAnalysisEnabled: boolean;
|
|
30
29
|
graderReplications?: number;
|
|
@@ -46,7 +45,6 @@ export interface ResolvedOptions {
|
|
|
46
45
|
/** True when --publish or --no-publish was explicitly passed by the user. */
|
|
47
46
|
publishExplicit: boolean;
|
|
48
47
|
publishTag?: string;
|
|
49
|
-
readinessEnabled: boolean;
|
|
50
48
|
reportDataset?: string;
|
|
51
49
|
reportProjectId?: string;
|
|
52
50
|
sanityDocumentArgs: string[];
|
|
@@ -37,6 +37,10 @@ const VALID_SEARCH_MODES = ["open", "origin-only", "off"];
|
|
|
37
37
|
export function computeResolvedOptions(opts) {
|
|
38
38
|
// Resolve paths relative to the caller's cwd, not the eval package root
|
|
39
39
|
const callerCwd = getCallerCwd();
|
|
40
|
+
// `.ailf/config.yaml` is the per-environment config home for `ailf run`
|
|
41
|
+
// (W0077 Phase 6a). Load early so downstream cascades (source, agentic,
|
|
42
|
+
// owner, output, etc.) can read from it.
|
|
43
|
+
const repoConfig = loadRepoConfigIfPresent(callerCwd);
|
|
40
44
|
// Validate + normalize mode via the single boundary function.
|
|
41
45
|
// normalizeMode() maps legacy variant names (baseline, agentic, etc.)
|
|
42
46
|
// to canonical mode "literacy" + variant, and throws on invalid input.
|
|
@@ -59,34 +63,34 @@ export function computeResolvedOptions(opts) {
|
|
|
59
63
|
console.error(`❌ ${err instanceof Error ? err.message : String(err)}`);
|
|
60
64
|
process.exit(1);
|
|
61
65
|
}
|
|
62
|
-
// Debug options — any
|
|
63
|
-
// implies --debug, so users don't
|
|
64
|
-
// When DEBUG_EVAL is explicitly "0", ignore the
|
|
65
|
-
// CLI flags
|
|
66
|
+
// Debug + filter options — any filter flag (--filter-first-n,
|
|
67
|
+
// --filter-pattern, --filter-sample) implies --debug, so users don't
|
|
68
|
+
// need to pass both. When DEBUG_EVAL is explicitly "0", ignore the env
|
|
69
|
+
// vars. CLI flags always win over env vars.
|
|
66
70
|
const debugEnvDisabled = process.env.DEBUG_EVAL === "0";
|
|
67
|
-
const
|
|
71
|
+
const filterFirstN = opts.filterFirstN ??
|
|
68
72
|
(process.env.DEBUG_EVAL_N && !debugEnvDisabled
|
|
69
73
|
? parseInt(process.env.DEBUG_EVAL_N, 10)
|
|
70
74
|
: undefined);
|
|
71
|
-
const
|
|
75
|
+
const filterPattern = opts.filterPattern ??
|
|
72
76
|
(process.env.DEBUG_EVAL_PATTERN && !debugEnvDisabled
|
|
73
77
|
? process.env.DEBUG_EVAL_PATTERN
|
|
74
78
|
: undefined);
|
|
75
|
-
const
|
|
79
|
+
const filterSample = opts.filterSample ??
|
|
76
80
|
(process.env.DEBUG_EVAL_SAMPLE && !debugEnvDisabled
|
|
77
81
|
? parseInt(process.env.DEBUG_EVAL_SAMPLE, 10)
|
|
78
82
|
: undefined);
|
|
79
83
|
const debugEnabled = opts.debug ||
|
|
80
84
|
process.env.DEBUG_EVAL === "1" ||
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
85
|
+
filterFirstN !== undefined ||
|
|
86
|
+
filterPattern !== undefined ||
|
|
87
|
+
filterSample !== undefined;
|
|
84
88
|
const debug = debugEnabled
|
|
85
89
|
? {
|
|
86
90
|
enabled: true,
|
|
87
|
-
firstN:
|
|
88
|
-
pattern:
|
|
89
|
-
sample:
|
|
91
|
+
firstN: filterFirstN,
|
|
92
|
+
pattern: filterPattern,
|
|
93
|
+
sample: filterSample,
|
|
90
94
|
}
|
|
91
95
|
: undefined;
|
|
92
96
|
// Search mode validation
|
|
@@ -95,16 +99,29 @@ export function computeResolvedOptions(opts) {
|
|
|
95
99
|
console.error(`❌ Invalid --search mode "${searchMode}". Must be one of: ${VALID_SEARCH_MODES.join(", ")}`);
|
|
96
100
|
process.exit(1);
|
|
97
101
|
}
|
|
98
|
-
// Merge repeatable args (singular + plural aliases)
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
//
|
|
104
|
-
|
|
105
|
-
const
|
|
102
|
+
// Merge repeatable args (singular + plural aliases). `headerArgs` and
|
|
103
|
+
// `allowedOriginArgs` are populated from `.ailf/config.yaml`'s `agentic`
|
|
104
|
+
// block (W0077 Phase 6f); the CLI flags `--header` and `--allowed-origin`
|
|
105
|
+
// were retired. The URL-classification block below may still append a
|
|
106
|
+
// host to `allowedOriginArgs` when neither config nor CLI provided one.
|
|
107
|
+
// The `DOC_HEADERS` and `DOC_ALLOWED_ORIGIN(S)` env vars still merge in
|
|
108
|
+
// sources.ts at the doc-fetch boundary (additive, unchanged).
|
|
109
|
+
const urlArgs = opts.url;
|
|
110
|
+
const headerArgs = [];
|
|
111
|
+
const allowedOriginArgs = [];
|
|
112
|
+
const sanityDocumentArgs = opts.sanityDocument;
|
|
113
|
+
if (repoConfig?.agentic?.headers) {
|
|
114
|
+
for (const [key, value] of Object.entries(repoConfig.agentic.headers)) {
|
|
115
|
+
headerArgs.push(`${key}: ${value}`);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
if (repoConfig?.agentic?.allowedOrigins) {
|
|
119
|
+
allowedOriginArgs.push(...repoConfig.agentic.allowedOrigins);
|
|
120
|
+
}
|
|
121
|
+
// Source overrides — perspective stays per-run (CLI flag), the dataset /
|
|
122
|
+
// project / studio-origin trio moved into `.ailf/config.yaml`'s `source`
|
|
123
|
+
// block in W0077 Phase 6d. Env vars still win over the config-file value.
|
|
106
124
|
const perspectiveOverride = opts.sanityPerspective;
|
|
107
|
-
const studioOriginOverride = opts.sanityStudioOrigin;
|
|
108
125
|
// URL classification (pure computation — results captured, not applied to env)
|
|
109
126
|
if (urlArgs.length > 0) {
|
|
110
127
|
const classification = classifyUrls(urlArgs);
|
|
@@ -115,21 +132,6 @@ export function computeResolvedOptions(opts) {
|
|
|
115
132
|
sanityDocumentArgs.push(...merged);
|
|
116
133
|
}
|
|
117
134
|
}
|
|
118
|
-
// Validate custom headers (early error)
|
|
119
|
-
if (headerArgs.length > 0) {
|
|
120
|
-
for (const h of headerArgs) {
|
|
121
|
-
const colonIdx = h.indexOf(":");
|
|
122
|
-
if (colonIdx === -1) {
|
|
123
|
-
console.error(`❌ Invalid header format: "${h}". Expected "Key: Value".`);
|
|
124
|
-
process.exit(1);
|
|
125
|
-
}
|
|
126
|
-
const key = h.slice(0, colonIdx).trim();
|
|
127
|
-
if (!key) {
|
|
128
|
-
console.error(`❌ Invalid header: empty key in "${h}"`);
|
|
129
|
-
process.exit(1);
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
135
|
// Auto-infer allowed origin from --url
|
|
134
136
|
if (urlArgs.length > 0 && allowedOriginArgs.length === 0) {
|
|
135
137
|
try {
|
|
@@ -170,22 +172,48 @@ export function computeResolvedOptions(opts) {
|
|
|
170
172
|
}
|
|
171
173
|
}
|
|
172
174
|
}
|
|
173
|
-
// Comparison: --before auto-enables --compare
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
//
|
|
175
|
+
// Comparison: --before-source auto-enables --compare. The `--compare` flag
|
|
176
|
+
// is a Commander optional-argument: undefined when not passed, `true` for
|
|
177
|
+
// the bare flag (compare against latest), and a string path when the user
|
|
178
|
+
// pinned a specific baseline (`--compare path/to/baseline.json`).
|
|
179
|
+
const beforeOption = opts.beforeSource;
|
|
180
|
+
const compareEnabled = (opts.compare !== undefined && opts.compare !== false) ||
|
|
181
|
+
beforeOption !== undefined;
|
|
182
|
+
const compareBaseline = typeof opts.compare === "string" ? opts.compare : undefined;
|
|
183
|
+
// Task-source resolution (W0077 Phase 6h) — `--task-source` and
|
|
184
|
+
// `--repo-tasks-path` retired. Both move under `taskSource: {...}` in
|
|
185
|
+
// `.ailf/config.yaml`. Cascade: config → built-in default (content-lake).
|
|
186
|
+
// When type is `repo` and no path is set, fall back to `<cwd>/.ailf/tasks/`
|
|
187
|
+
// (the location `ailf init` scaffolds).
|
|
188
|
+
const resolvedTaskSourceType = resolveTaskSourceType(repoConfig?.taskSource?.type);
|
|
189
|
+
const resolvedRepoTasksPath = resolveRepoTasksPath(callerCwd, repoConfig?.taskSource?.repoTasksPath, resolvedTaskSourceType);
|
|
190
|
+
// Source overrides (W0077 Phase 6d) — `--sanity-dataset`, `--sanity-project`,
|
|
191
|
+
// and `--sanity-studio-origin` were retired from `ailf run`. Cascade is now:
|
|
192
|
+
// env var > .ailf/config.yaml `source.*` > built-in default (in sources.ts).
|
|
193
|
+
const datasetOverride = process.env.SANITY_DATASET ?? repoConfig?.source?.dataset;
|
|
194
|
+
const projectIdOverride = process.env.SANITY_PROJECT_ID ?? repoConfig?.source?.projectId;
|
|
195
|
+
const studioOriginOverride = process.env.SANITY_STUDIO_ORIGIN ?? repoConfig?.source?.studioOrigin;
|
|
196
|
+
// Report store overrides (W0077 Phase 6e — `--report-dataset` and
|
|
197
|
+
// `--report-project` retired). Resolution order:
|
|
198
|
+
// 1. Environment variables (AILF_REPORT_DATASET, AILF_REPORT_PROJECT_ID)
|
|
199
|
+
// 2. .ailf/config.yaml reportStore block
|
|
200
|
+
// 3. Eval dataset override (so perspective evals publish to the same dataset)
|
|
201
|
+
const reportDataset = process.env.AILF_REPORT_DATASET ??
|
|
202
|
+
repoConfig?.reportStore?.dataset ??
|
|
203
|
+
datasetOverride ??
|
|
204
|
+
undefined;
|
|
205
|
+
const reportProjectId = process.env.AILF_REPORT_PROJECT_ID ??
|
|
206
|
+
repoConfig?.reportStore?.projectId ??
|
|
207
|
+
undefined;
|
|
208
|
+
// Publish polarity (W0077 Phase 4) — auto policy lives in
|
|
209
|
+
// .ailf/config.yaml's `publish.auto` (or env / default). CLI flags and
|
|
210
|
+
// AILF_PUBLISH still override the policy.
|
|
177
211
|
const reportStoreToken = process.env.AILF_REPORT_SANITY_API_TOKEN ?? process.env.SANITY_API_TOKEN;
|
|
178
212
|
const reportStoreConfigured = Boolean(reportStoreToken);
|
|
179
|
-
// Track whether the user explicitly chose --publish or --no-publish.
|
|
180
|
-
// In remote mode, when this is false we omit the field from the API
|
|
181
|
-
// request so the server can apply its own default (publish when jobId
|
|
182
|
-
// is present). Without this, the local smart-default (which checks for
|
|
183
|
-
// a local Sanity token the CLI doesn't have) would send publish:false
|
|
184
|
-
// and suppress server-side report publishing.
|
|
185
213
|
const publishExplicit = opts.publish !== undefined || process.env.AILF_PUBLISH !== undefined;
|
|
214
|
+
const publishAuto = resolvePublishAuto(repoConfig?.publish?.auto);
|
|
186
215
|
let publishEnabled;
|
|
187
216
|
if (opts.publish !== undefined) {
|
|
188
|
-
// Explicit --publish or --no-publish always wins
|
|
189
217
|
publishEnabled = opts.publish;
|
|
190
218
|
}
|
|
191
219
|
else if (process.env.AILF_PUBLISH === "1") {
|
|
@@ -195,36 +223,40 @@ export function computeResolvedOptions(opts) {
|
|
|
195
223
|
publishEnabled = false;
|
|
196
224
|
}
|
|
197
225
|
else {
|
|
198
|
-
//
|
|
199
|
-
|
|
226
|
+
// Apply the auto policy. The report store still has to be configured
|
|
227
|
+
// for `auto: "always"` and `"full-runs"` — without a token, publishing
|
|
228
|
+
// is impossible regardless of policy.
|
|
229
|
+
switch (publishAuto) {
|
|
230
|
+
case "always":
|
|
231
|
+
publishEnabled = reportStoreConfigured;
|
|
232
|
+
break;
|
|
233
|
+
case "never":
|
|
234
|
+
publishEnabled = false;
|
|
235
|
+
break;
|
|
236
|
+
case "full-runs":
|
|
237
|
+
default:
|
|
238
|
+
publishEnabled = reportStoreConfigured && !debugEnabled;
|
|
239
|
+
break;
|
|
240
|
+
}
|
|
200
241
|
}
|
|
201
|
-
//
|
|
202
|
-
|
|
203
|
-
//
|
|
204
|
-
//
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
// 3. .ailf/config.yaml reportStore block (when repo tasks path is set)
|
|
211
|
-
// 4. Eval dataset override (so perspective evals publish to the same dataset)
|
|
212
|
-
const repoConfig = loadRepoConfigIfPresent(resolvedRepoTasksPath);
|
|
213
|
-
const reportDataset = opts.reportDataset ??
|
|
214
|
-
process.env.AILF_REPORT_DATASET ??
|
|
215
|
-
repoConfig?.reportStore?.dataset ??
|
|
216
|
-
datasetOverride ??
|
|
217
|
-
undefined;
|
|
218
|
-
const reportProjectId = opts.reportProject ??
|
|
219
|
-
process.env.AILF_REPORT_PROJECT_ID ??
|
|
220
|
-
repoConfig?.reportStore?.projectId ??
|
|
221
|
-
undefined;
|
|
242
|
+
// Tag default cascade: --publish-tag > AILF_PUBLISH_TAG > .ailf/config.yaml
|
|
243
|
+
const publishTag = opts.publishTag ?? process.env.AILF_PUBLISH_TAG ?? repoConfig?.publish?.tag;
|
|
244
|
+
// Execution-tier resolution (W0077 Phase 6b) — concurrency, grader
|
|
245
|
+
// replications, gap analysis, and api URL all moved from CLI flags to
|
|
246
|
+
// `.ailf/config.yaml`'s `execution` block. Cascade for each:
|
|
247
|
+
// env var (where one exists) > .ailf/config.yaml > built-in default
|
|
248
|
+
const concurrency = repoConfig?.execution?.concurrency;
|
|
249
|
+
const graderReplications = repoConfig?.execution?.graderReplications;
|
|
250
|
+
const gapAnalysisEnabled = repoConfig?.execution?.gapAnalysis ?? true;
|
|
222
251
|
// Remote mode
|
|
223
252
|
const remote = opts.remote || process.env.AILF_REMOTE === "1";
|
|
224
|
-
const apiUrl =
|
|
253
|
+
const apiUrl = process.env.AILF_API_URL ??
|
|
254
|
+
repoConfig?.execution?.apiUrl ??
|
|
255
|
+
"https://ailf-api.sanity.build";
|
|
225
256
|
const apiKey = process.env.AILF_API_KEY ?? undefined;
|
|
226
|
-
// Output directory
|
|
227
|
-
|
|
257
|
+
// Output directory (W0077 Phase 6c) — `output.dir` from .ailf/config.yaml
|
|
258
|
+
// when set, otherwise <cwd>/.ailf/results/latest/.
|
|
259
|
+
const outputDir = resolveOutputDir(repoConfig?.output?.dir);
|
|
228
260
|
return {
|
|
229
261
|
allowedOriginArgs,
|
|
230
262
|
apiKey,
|
|
@@ -232,16 +264,15 @@ export function computeResolvedOptions(opts) {
|
|
|
232
264
|
areaOption,
|
|
233
265
|
beforeOption,
|
|
234
266
|
changedDocsOption,
|
|
235
|
-
compareBaseline
|
|
267
|
+
compareBaseline,
|
|
236
268
|
compareEnabled,
|
|
237
269
|
compareThreshold: opts.threshold,
|
|
238
|
-
concurrency
|
|
270
|
+
concurrency,
|
|
239
271
|
datasetOverride,
|
|
240
272
|
debug,
|
|
241
|
-
discoveryReportEnabled: opts.discoveryReport,
|
|
242
273
|
dryRun: opts.dryRun,
|
|
243
|
-
gapAnalysisEnabled
|
|
244
|
-
graderReplications
|
|
274
|
+
gapAnalysisEnabled,
|
|
275
|
+
graderReplications,
|
|
245
276
|
headerArgs,
|
|
246
277
|
impactSummary,
|
|
247
278
|
mode,
|
|
@@ -256,15 +287,14 @@ export function computeResolvedOptions(opts) {
|
|
|
256
287
|
promptfooUrl: opts.promptfooUrl,
|
|
257
288
|
publishEnabled,
|
|
258
289
|
publishExplicit,
|
|
259
|
-
publishTag
|
|
260
|
-
readinessEnabled: opts.readiness,
|
|
290
|
+
publishTag,
|
|
261
291
|
remote,
|
|
262
292
|
reportDataset,
|
|
263
293
|
reportProjectId,
|
|
264
294
|
sanityDocumentArgs,
|
|
265
295
|
searchMode,
|
|
266
|
-
skipEval: opts.
|
|
267
|
-
skipFetch: opts.
|
|
296
|
+
skipEval: opts.eval === false,
|
|
297
|
+
skipFetch: opts.fetch === false,
|
|
268
298
|
source: opts.source,
|
|
269
299
|
studioOriginOverride,
|
|
270
300
|
repoTasksPath: resolvedRepoTasksPath,
|
|
@@ -272,37 +302,55 @@ export function computeResolvedOptions(opts) {
|
|
|
272
302
|
tagOption,
|
|
273
303
|
taskSourceType: resolvedTaskSourceType,
|
|
274
304
|
urlArgs,
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
305
|
+
// Artifact-writer settings (W0077 Phase 6g) — `--no-artifacts`,
|
|
306
|
+
// `--artifacts-dir`, and `--artifacts-exclude` retired. Cascade:
|
|
307
|
+
// AILF_ARTIFACTS_DIR > .ailf/config.yaml `artifacts.dir` > default
|
|
308
|
+
// .ailf/config.yaml `artifacts.enabled: false` > writers attached
|
|
309
|
+
// .ailf/config.yaml `artifacts.exclude` > no exclusions
|
|
310
|
+
// `--no-artifacts-write` (artifactsDryRun) stays per-run.
|
|
311
|
+
artifactsDisabled: repoConfig?.artifacts?.enabled === false,
|
|
312
|
+
artifactsDir: process.env.AILF_ARTIFACTS_DIR ?? repoConfig?.artifacts?.dir,
|
|
313
|
+
artifactsDryRun: opts.artifactsWrite === false,
|
|
314
|
+
artifactsExclude: repoConfig?.artifacts?.exclude,
|
|
279
315
|
classificationOption: opts.classification?.trim() || undefined,
|
|
280
|
-
|
|
281
|
-
|
|
316
|
+
// Owner attribution (W0077 Phase 6f) — `--owner-team` and
|
|
317
|
+
// `--owner-individual` retired. Cascade: AILF_OWNER_TEAM /
|
|
318
|
+
// AILF_OWNER_INDIVIDUAL env vars > .ailf/config.yaml `owner.*` > undefined.
|
|
319
|
+
// Downstream resolution in build-request.ts already honors the env var as a
|
|
320
|
+
// fallback when this option is unset, but threading it through here keeps
|
|
321
|
+
// the cascade order explicit.
|
|
322
|
+
ownerTeamOption: process.env.AILF_OWNER_TEAM?.trim() ||
|
|
323
|
+
repoConfig?.owner?.team ||
|
|
324
|
+
undefined,
|
|
325
|
+
ownerIndividualOption: process.env.AILF_OWNER_INDIVIDUAL?.trim() ||
|
|
326
|
+
repoConfig?.owner?.individual ||
|
|
327
|
+
undefined,
|
|
282
328
|
purposeOption: opts.purpose?.trim() || undefined,
|
|
283
329
|
labelOptions: opts.label ?? [],
|
|
284
330
|
};
|
|
285
331
|
}
|
|
332
|
+
const PUBLISH_AUTO_VALUES = ["always", "full-runs", "never"];
|
|
286
333
|
/**
|
|
287
|
-
* Resolve the
|
|
288
|
-
*
|
|
289
|
-
*
|
|
290
|
-
*
|
|
334
|
+
* Resolve the publish auto policy. Precedence:
|
|
335
|
+
* 1. .ailf/config.yaml `publish.auto`
|
|
336
|
+
* 2. AILF_PUBLISH_AUTO env var
|
|
337
|
+
* 3. Default: "full-runs" (preserves the historical smart default)
|
|
291
338
|
*
|
|
292
|
-
*
|
|
293
|
-
*
|
|
339
|
+
* Unrecognized env-var values fall through to the default with a warning;
|
|
340
|
+
* the schema validates the config-file value at parse time.
|
|
294
341
|
*/
|
|
295
|
-
function
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
if (
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
.
|
|
305
|
-
|
|
342
|
+
function resolvePublishAuto(repoValue) {
|
|
343
|
+
if (repoValue)
|
|
344
|
+
return repoValue;
|
|
345
|
+
const envValue = process.env.AILF_PUBLISH_AUTO?.trim();
|
|
346
|
+
if (envValue &&
|
|
347
|
+
PUBLISH_AUTO_VALUES.includes(envValue)) {
|
|
348
|
+
return envValue;
|
|
349
|
+
}
|
|
350
|
+
if (envValue) {
|
|
351
|
+
console.warn(`⚠️ AILF_PUBLISH_AUTO="${envValue}" is not recognized; valid values are ${PUBLISH_AUTO_VALUES.join(", ")}. Falling back to "full-runs".`);
|
|
352
|
+
}
|
|
353
|
+
return "full-runs";
|
|
306
354
|
}
|
|
307
355
|
/** Resolve and validate the --task-source flag value. */
|
|
308
356
|
function resolveTaskSourceType(raw) {
|
|
@@ -370,18 +418,11 @@ export async function executePipeline(cliOpts) {
|
|
|
370
418
|
const callerCwd = getCallerCwd();
|
|
371
419
|
const adapter = new FileConfigAdapter(cliOpts.config, ROOT);
|
|
372
420
|
const config = await adapter.resolve();
|
|
373
|
-
//
|
|
374
|
-
//
|
|
375
|
-
//
|
|
376
|
-
//
|
|
377
|
-
if (
|
|
378
|
-
config.repoTasksPath = resolve(callerCwd, cliOpts.repoTasksPath);
|
|
379
|
-
}
|
|
380
|
-
else if (config.taskSourceType === "repo" && !config.repoTasksPath) {
|
|
381
|
-
// Default: when taskSource=repo but no path set, look in .ailf/tasks/
|
|
382
|
-
// (matches the `ailf init` scaffold location). Silent fallback here —
|
|
383
|
-
// composition root will surface a helpful error if the directory is
|
|
384
|
-
// missing.
|
|
421
|
+
// When `taskSource.type` is `repo` and no `repoTasksPath` was set in
|
|
422
|
+
// the config file, fall back to `<callerCwd>/.ailf/tasks/` (the
|
|
423
|
+
// location `ailf init` scaffolds). Silent fallback — composition-root
|
|
424
|
+
// surfaces a helpful error if the directory is missing.
|
|
425
|
+
if (config.taskSourceType === "repo" && !config.repoTasksPath) {
|
|
385
426
|
const defaultPath = resolve(callerCwd, ".ailf", "tasks");
|
|
386
427
|
if (existsSync(defaultPath)) {
|
|
387
428
|
config.repoTasksPath = defaultPath;
|
|
@@ -390,18 +431,13 @@ export async function executePipeline(cliOpts) {
|
|
|
390
431
|
if (cliOpts.output) {
|
|
391
432
|
config.outputPath = resolve(callerCwd, cliOpts.output);
|
|
392
433
|
}
|
|
393
|
-
//
|
|
394
|
-
|
|
395
|
-
//
|
|
396
|
-
//
|
|
397
|
-
|
|
398
|
-
config.
|
|
399
|
-
config.
|
|
400
|
-
config.artifactsDryRun ??= cliOpts.artifactsDryRun;
|
|
401
|
-
const excludeList = parseArtifactsExcludeList(cliOpts.artifactsExclude);
|
|
402
|
-
if (excludeList) {
|
|
403
|
-
config.artifactsExclude = excludeList;
|
|
404
|
-
}
|
|
434
|
+
// Artifact-writer env-var fallbacks. The adapter populates the bulk of
|
|
435
|
+
// the artifact settings from `EvalConfigSchema.artifacts.*` (W0077
|
|
436
|
+
// Phase 6g); we layer the env-var fallbacks here for fields the schema
|
|
437
|
+
// doesn't cover (GCS bucket, upload mode), and the AILF_ARTIFACTS_DIR
|
|
438
|
+
// override that wins over both schema and CLI.
|
|
439
|
+
config.artifactsDir = process.env.AILF_ARTIFACTS_DIR ?? config.artifactsDir;
|
|
440
|
+
config.artifactsDryRun ??= cliOpts.artifactsWrite === false;
|
|
405
441
|
config.artifactGcsBucket ??= process.env.AILF_GCS_ARTIFACT_BUCKET;
|
|
406
442
|
config.artifactUpload ??= parseArtifactUploadEnv(process.env.AILF_ARTIFACT_UPLOAD);
|
|
407
443
|
// Create AppContext directly from the merged config so adapters
|
|
@@ -470,18 +506,16 @@ function writePipelineResult(result, outputDir) {
|
|
|
470
506
|
console.log(` 📄 Pipeline result: ${resultFile}\n`);
|
|
471
507
|
}
|
|
472
508
|
/**
|
|
473
|
-
* Load
|
|
474
|
-
*
|
|
509
|
+
* Load `<cwd>/.ailf/config.yaml` if it exists. Returns null when the file
|
|
510
|
+
* is absent or unparseable.
|
|
475
511
|
*
|
|
476
|
-
*
|
|
477
|
-
*
|
|
478
|
-
*
|
|
512
|
+
* Auto-loads regardless of `--task-source`: the same `.ailf/config.yaml` is
|
|
513
|
+
* the per-environment configuration home for every run (W0077 Phase 6a).
|
|
514
|
+
* Subsequent flag-family migrations (6b–6h) read additional fields from
|
|
515
|
+
* this same file via the same loader.
|
|
479
516
|
*/
|
|
480
|
-
function loadRepoConfigIfPresent(
|
|
481
|
-
|
|
482
|
-
return null;
|
|
483
|
-
// .ailf/tasks/ → .ailf/config.yaml
|
|
484
|
-
const configPath = resolve(repoTasksPath, "..", "config.yaml");
|
|
517
|
+
function loadRepoConfigIfPresent(cwd) {
|
|
518
|
+
const configPath = resolve(cwd, ".ailf", "config.yaml");
|
|
485
519
|
if (!existsSync(configPath))
|
|
486
520
|
return null;
|
|
487
521
|
try {
|
|
@@ -16,7 +16,7 @@ const ROOT = resolve(__dirname, "..", "..");
|
|
|
16
16
|
export function createPrCommentCommand() {
|
|
17
17
|
const cmd = new Command("pr-comment")
|
|
18
18
|
.description("Generate a markdown PR comment from evaluation scores")
|
|
19
|
-
.option("--output <path>", "Write comment to file (default: stdout)")
|
|
19
|
+
.option("-o, --output <path>", "Write comment to file (default: stdout)")
|
|
20
20
|
.option("--promptfoo-url <url>", "Promptfoo share URL to include")
|
|
21
21
|
.action(async (opts) => {
|
|
22
22
|
try {
|
|
@@ -29,8 +29,6 @@ export function createPrCommentCommand() {
|
|
|
29
29
|
skipEval: true,
|
|
30
30
|
compareEnabled: false,
|
|
31
31
|
gapAnalysisEnabled: false,
|
|
32
|
-
readinessEnabled: false,
|
|
33
|
-
discoveryReportEnabled: false,
|
|
34
32
|
publishEnabled: false,
|
|
35
33
|
noCache: true,
|
|
36
34
|
noRemoteCache: true,
|