@sanity/ailf 3.6.0 → 3.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/airbyte/ai_literacy_framework.connector.yaml +1 -1
- package/config/thresholds.ts +3 -3
- package/dist/_vendor/ailf-core/examples/index.d.ts +2 -2
- package/dist/_vendor/ailf-core/examples/index.js +2 -2
- package/dist/_vendor/ailf-core/ports/context.d.ts +0 -4
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +38 -12
- package/dist/_vendor/ailf-core/schemas/eval-config.js +102 -22
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +4 -6
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -3
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +2 -2
- package/dist/_vendor/ailf-shared/run-classification.d.ts +2 -2
- package/dist/_vendor/ailf-shared/run-classification.js +1 -1
- package/dist/_vendor/ailf-shared/run-context.d.ts +1 -1
- package/dist/adapters/api-client/build-request.d.ts +0 -2
- package/dist/adapters/api-client/build-request.js +2 -6
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +1 -1
- package/dist/adapters/config-sources/file-config-adapter.d.ts +1 -1
- package/dist/adapters/config-sources/file-config-adapter.js +38 -12
- package/dist/adapters/task-sources/content-lake-task-source.js +17 -0
- package/dist/adapters/task-sources/index.d.ts +1 -1
- package/dist/adapters/task-sources/index.js +1 -1
- package/dist/adapters/task-sources/repo-schemas.d.ts +154 -0
- package/dist/adapters/task-sources/repo-schemas.js +137 -0
- package/dist/cli.d.ts +2 -2
- package/dist/cli.js +134 -38
- package/dist/commands/agent-report.js +1 -1
- package/dist/commands/calculate-scores.js +0 -2
- package/dist/commands/check-staleness.js +1 -1
- package/dist/commands/chronic-failures.js +4 -4
- package/dist/commands/coverage-audit.js +6 -7
- package/dist/commands/discovery-report.js +16 -4
- package/dist/commands/eval.d.ts +1 -1
- package/dist/commands/eval.js +1 -1
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +13 -44
- package/dist/commands/fetch-docs.js +0 -2
- package/dist/commands/generate-configs.js +0 -2
- package/dist/commands/grader/index.js +3 -3
- package/dist/commands/init.d.ts +2 -2
- package/dist/commands/init.js +10 -9
- package/dist/commands/interactive.d.ts +1 -1
- package/dist/commands/interactive.js +8 -8
- package/dist/commands/pipeline-action.d.ts +1 -3
- package/dist/commands/pipeline-action.js +174 -140
- package/dist/commands/pr-comment.js +1 -3
- package/dist/commands/publish.d.ts +1 -1
- package/dist/commands/publish.js +2 -4
- package/dist/commands/readiness-report.js +17 -8
- package/dist/commands/remote-pipeline.d.ts +1 -1
- package/dist/commands/remote-pipeline.js +1 -3
- package/dist/commands/run.d.ts +64 -0
- package/dist/commands/{pipeline.js → run.js} +19 -30
- package/dist/commands/shared/help.js +4 -4
- package/dist/commands/shared/options.d.ts +29 -3
- package/dist/commands/shared/options.js +37 -13
- package/dist/commands/validate-tasks.js +1 -1
- package/dist/commands/validate.d.ts +1 -1
- package/dist/commands/validate.js +2 -2
- package/dist/commands/weekly-digest.js +3 -3
- package/dist/config/thresholds.ts +3 -3
- package/dist/orchestration/build-app-context.js +0 -2
- package/dist/orchestration/build-step-sequence.js +1 -11
- package/dist/orchestration/steps/fetch-docs-step.js +1 -1
- package/dist/orchestration/steps/index.d.ts +0 -2
- package/dist/orchestration/steps/index.js +0 -2
- package/dist/orchestration/steps/run-eval-step.js +1 -1
- package/dist/pipeline/cache.d.ts +1 -1
- package/dist/pipeline/map-request-to-config.js +0 -2
- package/dist/pipeline/plan.d.ts +2 -4
- package/dist/pipeline/plan.js +4 -32
- package/dist/pipeline/run-context.d.ts +1 -1
- package/dist/pipeline/run-context.js +4 -4
- package/dist/pipeline/validate.d.ts +1 -1
- package/dist/pipeline/validate.js +1 -1
- package/package.json +7 -7
- package/dist/commands/pipeline.d.ts +0 -77
- package/dist/orchestration/steps/discovery-report-step.d.ts +0 -13
- package/dist/orchestration/steps/discovery-report-step.js +0 -62
- package/dist/orchestration/steps/readiness-step.d.ts +0 -13
- package/dist/orchestration/steps/readiness-step.js +0 -98
package/dist/cli.js
CHANGED
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
* appends Quick Start examples.
|
|
14
14
|
*
|
|
15
15
|
* Usage:
|
|
16
|
-
* ailf
|
|
16
|
+
* ailf run [flags] # full evaluation run
|
|
17
17
|
* ailf compare [flags] # compare evaluation runs
|
|
18
18
|
* ailf baseline <cmd> [flags] # baseline management
|
|
19
19
|
* ailf validate [flags] # config validation
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
* --dotenv <path> # override default .env path
|
|
27
27
|
*
|
|
28
28
|
* Dev mode (without building):
|
|
29
|
-
* tsx src/cli.ts
|
|
29
|
+
* tsx src/cli.ts run --debug
|
|
30
30
|
*/
|
|
31
31
|
import { config as dotenvConfig } from "dotenv";
|
|
32
32
|
import { existsSync, readFileSync } from "fs";
|
|
@@ -76,22 +76,75 @@ else if (process.argv.includes("--quiet") || process.argv.includes("-q")) {
|
|
|
76
76
|
process.env.AILF_LOG_LEVEL = "quiet";
|
|
77
77
|
}
|
|
78
78
|
// ---------------------------------------------------------------------------
|
|
79
|
-
//
|
|
80
|
-
// --------------------------------------------------------------------------
|
|
81
|
-
// The legacy collector has been removed. Callers still using
|
|
82
|
-
// --capture / --capture-dir / --no-capture-compress / --no-capture-extras
|
|
83
|
-
// or AILF_CAPTURE* / AILF_LEGACY_COLLECTOR / AILF_UNIFIED_ARTIFACTS must
|
|
84
|
-
// migrate to --artifacts-dir / --no-artifacts / --artifacts-exclude. We
|
|
85
|
-
// print a clear pointer so failures don't bubble up as opaque "unknown
|
|
86
|
-
// option" errors from Commander.
|
|
79
|
+
// Hard-error on retired flags, env vars, and commands with a migration hint.
|
|
87
80
|
// ---------------------------------------------------------------------------
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
81
|
+
// Each entry maps an old identifier to the message shown when it's seen, so
|
|
82
|
+
// failures don't bubble up as opaque "unknown option" errors from Commander.
|
|
83
|
+
// W0052 retired the legacy artifact collector; W0075 retired the --skip-*
|
|
84
|
+
// negation prefix, the --debug-{n,pattern,sample} filter flags, and several
|
|
85
|
+
// top-level report/validator commands that were consolidated into umbrellas.
|
|
86
|
+
// ---------------------------------------------------------------------------
|
|
87
|
+
const RETIRED_CAPTURE_HINT = " Use --artifacts-dir / --no-artifacts / --artifacts-exclude instead.\n" +
|
|
88
|
+
" See docs/guides/cli-guide.md and docs/decisions/D0033-unified-run-anchored-artifact-capture.md.";
|
|
89
|
+
const RETIRED_FLAG_HINTS = {
|
|
90
|
+
"--capture": RETIRED_CAPTURE_HINT,
|
|
91
|
+
"--capture-dir": RETIRED_CAPTURE_HINT,
|
|
92
|
+
"--no-capture-compress": RETIRED_CAPTURE_HINT,
|
|
93
|
+
"--no-capture-extras": RETIRED_CAPTURE_HINT,
|
|
94
|
+
"--capture-exclude": RETIRED_CAPTURE_HINT,
|
|
95
|
+
"--skip-fetch": " Use --no-fetch instead. See docs/design-docs/cli-naming-convention.md (W0075).",
|
|
96
|
+
"--skip-eval": " Use --no-eval instead. See docs/design-docs/cli-naming-convention.md (W0075).",
|
|
97
|
+
"--debug-n": " Use --filter-first-n instead. See docs/design-docs/cli-naming-convention.md (W0075).",
|
|
98
|
+
"--debug-pattern": " Use --filter-pattern instead. See docs/design-docs/cli-naming-convention.md (W0075).",
|
|
99
|
+
"--debug-sample": " Use --filter-sample instead. See docs/design-docs/cli-naming-convention.md (W0075).",
|
|
100
|
+
"--output-format": " Use --format instead. See docs/design-docs/cli-naming-convention.md (W0075).",
|
|
101
|
+
"--artifacts-dry-run": " Use --no-artifacts-write instead. See docs/design-docs/cli-naming-convention.md (W0075).",
|
|
102
|
+
"--readiness": " Use `ailf report readiness --from-run <path>` instead. See docs/design-docs/pipeline-command-surface.md (W0077).",
|
|
103
|
+
"--discovery-report": " Use `ailf report discovery --from-run <path>` instead. See docs/design-docs/pipeline-command-surface.md (W0077).",
|
|
104
|
+
"--compare-baseline": " Use `--compare <path>` instead. `--compare` now takes an optional baseline argument. See docs/design-docs/pipeline-command-surface.md (W0077).",
|
|
105
|
+
"--before": " Use --before-source instead. The flag was renamed to disambiguate from baseline comparison. See docs/design-docs/pipeline-command-surface.md (W0077).",
|
|
106
|
+
"--concurrency": " Set `execution.concurrency` in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6b).",
|
|
107
|
+
"--grader-replications": " Set `execution.graderReplications` in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6b).",
|
|
108
|
+
"--no-gap-analysis": " Set `execution.gapAnalysis: false` in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6b).",
|
|
109
|
+
"--api-url": " Set `execution.apiUrl` in .ailf/config.yaml or use the AILF_API_URL env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6b).",
|
|
110
|
+
"--report-dataset": " Set `reportStore.dataset` in .ailf/config.yaml or use the AILF_REPORT_DATASET env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6e).",
|
|
111
|
+
"--report-project": " Set `reportStore.projectId` in .ailf/config.yaml or use the AILF_REPORT_PROJECT_ID env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6e).",
|
|
112
|
+
"--owner-team": " Set `owner.team` in .ailf/config.yaml or use the AILF_OWNER_TEAM env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6f).",
|
|
113
|
+
"--owner-individual": " Set `owner.individual` in .ailf/config.yaml or use the AILF_OWNER_INDIVIDUAL env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6f).",
|
|
114
|
+
"--header": " Set `agentic.headers` (key/value object) in .ailf/config.yaml or use the DOC_HEADERS env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6f).",
|
|
115
|
+
"--allowed-origin": " Set `agentic.allowedOrigins` (list of globs) in .ailf/config.yaml or use the DOC_ALLOWED_ORIGINS env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6f).",
|
|
116
|
+
"--task-source": " Set `taskSource.type` (content-lake | repo) in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6h).",
|
|
117
|
+
"--repo-tasks-path": " Set `taskSource.repoTasksPath` in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6h).",
|
|
118
|
+
};
|
|
119
|
+
const RETIRED_COMMAND_HINTS = {
|
|
120
|
+
pipeline: " Use `ailf run` instead. See docs/design-docs/pipeline-command-surface.md (W0077).",
|
|
121
|
+
"validate-tasks": " Use `ailf validate tasks` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
|
|
122
|
+
"readiness-report": " Use `ailf report readiness` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
|
|
123
|
+
"chronic-failures": " Use `ailf report chronic-failures` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
|
|
124
|
+
"coverage-audit": " Use `ailf report coverage` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
|
|
125
|
+
"discovery-report": " Use `ailf report discovery` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
|
|
126
|
+
"agent-report": " Use `ailf report agent` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
|
|
127
|
+
"weekly-digest": " Use `ailf report digest` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
|
|
128
|
+
"check-staleness": " Use `ailf report staleness` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
|
|
129
|
+
};
|
|
130
|
+
/**
|
|
131
|
+
* Per-subcommand retired-flag hints. Use this for flags that were retired
|
|
132
|
+
* from one subcommand but still exist on others (e.g. `--output-dir` is
|
|
133
|
+
* retired from `ailf run` but still present on `ailf publish`,
|
|
134
|
+
* `ailf pr-comment`, etc.). Keys are subcommand names; values share the
|
|
135
|
+
* same shape as `RETIRED_FLAG_HINTS`.
|
|
136
|
+
*/
|
|
137
|
+
const RETIRED_FLAG_HINTS_BY_COMMAND = {
|
|
138
|
+
run: {
|
|
139
|
+
"--output-dir": " Set `output.dir` in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6c).",
|
|
140
|
+
"--sanity-dataset": " Set `source.dataset` in .ailf/config.yaml or use the SANITY_DATASET env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6d).",
|
|
141
|
+
"--sanity-project": " Set `source.projectId` in .ailf/config.yaml or use the SANITY_PROJECT_ID env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6d).",
|
|
142
|
+
"--sanity-studio-origin": " Set `source.studioOrigin` in .ailf/config.yaml or use the SANITY_STUDIO_ORIGIN env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6d).",
|
|
143
|
+
"--no-artifacts": " Set `artifacts.enabled: false` in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6g).",
|
|
144
|
+
"--artifacts-dir": " Set `artifacts.dir` in .ailf/config.yaml or use the AILF_ARTIFACTS_DIR env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6g).",
|
|
145
|
+
"--artifacts-exclude": " Set `artifacts.exclude` (list of artifact-type names) in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6g).",
|
|
146
|
+
},
|
|
147
|
+
};
|
|
95
148
|
const RETIRED_ENV_VARS = [
|
|
96
149
|
"AILF_CAPTURE",
|
|
97
150
|
"AILF_CAPTURE_DIR",
|
|
@@ -102,15 +155,45 @@ const RETIRED_ENV_VARS = [
|
|
|
102
155
|
"AILF_LEGACY_COLLECTOR",
|
|
103
156
|
"AILF_UNIFIED_ARTIFACTS",
|
|
104
157
|
];
|
|
158
|
+
/**
|
|
159
|
+
* Identify the subcommand the user invoked — the first non-flag arg after
|
|
160
|
+
* `ailf` (argv[0]=node, argv[1]=cli.ts). Returns undefined if none.
|
|
161
|
+
*/
|
|
162
|
+
function findInvokedSubcommand() {
|
|
163
|
+
for (let i = 2; i < process.argv.length; i++) {
|
|
164
|
+
const arg = process.argv[i];
|
|
165
|
+
if (!arg.startsWith("-"))
|
|
166
|
+
return arg;
|
|
167
|
+
}
|
|
168
|
+
return undefined;
|
|
169
|
+
}
|
|
105
170
|
function findRetiredFlag() {
|
|
171
|
+
const subcommand = findInvokedSubcommand();
|
|
172
|
+
const subcommandHints = subcommand
|
|
173
|
+
? RETIRED_FLAG_HINTS_BY_COMMAND[subcommand]
|
|
174
|
+
: undefined;
|
|
106
175
|
for (const arg of process.argv) {
|
|
107
176
|
const bare = arg.split("=")[0];
|
|
108
|
-
if (
|
|
109
|
-
return bare;
|
|
177
|
+
if (subcommandHints && bare in subcommandHints) {
|
|
178
|
+
return { flag: bare, hint: subcommandHints[bare] };
|
|
179
|
+
}
|
|
180
|
+
if (bare in RETIRED_FLAG_HINTS) {
|
|
181
|
+
return { flag: bare, hint: RETIRED_FLAG_HINTS[bare] };
|
|
110
182
|
}
|
|
111
183
|
}
|
|
112
184
|
return undefined;
|
|
113
185
|
}
|
|
186
|
+
function findRetiredCommand() {
|
|
187
|
+
// The first non-flag argument after `ailf` (argv[0]=node, argv[1]=cli.ts).
|
|
188
|
+
for (let i = 2; i < process.argv.length; i++) {
|
|
189
|
+
const arg = process.argv[i];
|
|
190
|
+
if (!arg.startsWith("-") && arg in RETIRED_COMMAND_HINTS)
|
|
191
|
+
return arg;
|
|
192
|
+
if (!arg.startsWith("-"))
|
|
193
|
+
return undefined;
|
|
194
|
+
}
|
|
195
|
+
return undefined;
|
|
196
|
+
}
|
|
114
197
|
function findRetiredEnv() {
|
|
115
198
|
for (const name of RETIRED_ENV_VARS) {
|
|
116
199
|
if (process.env[name] !== undefined)
|
|
@@ -119,14 +202,21 @@ function findRetiredEnv() {
|
|
|
119
202
|
return undefined;
|
|
120
203
|
}
|
|
121
204
|
const retiredFlag = findRetiredFlag();
|
|
205
|
+
const retiredCommand = findRetiredCommand();
|
|
122
206
|
const retiredEnv = findRetiredEnv();
|
|
123
|
-
if (retiredFlag || retiredEnv) {
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
207
|
+
if (retiredFlag || retiredCommand || retiredEnv) {
|
|
208
|
+
if (retiredFlag) {
|
|
209
|
+
console.error(`❌ flag "${retiredFlag.flag}" was retired.`);
|
|
210
|
+
console.error(retiredFlag.hint);
|
|
211
|
+
}
|
|
212
|
+
else if (retiredCommand) {
|
|
213
|
+
console.error(`❌ command "${retiredCommand}" was retired.`);
|
|
214
|
+
console.error(RETIRED_COMMAND_HINTS[retiredCommand]);
|
|
215
|
+
}
|
|
216
|
+
else if (retiredEnv) {
|
|
217
|
+
console.error(`❌ environment variable "${retiredEnv}" was retired in W0052 along with the legacy artifact collector.`);
|
|
218
|
+
console.error(RETIRED_CAPTURE_HINT);
|
|
219
|
+
}
|
|
130
220
|
process.exit(2);
|
|
131
221
|
}
|
|
132
222
|
// ---------------------------------------------------------------------------
|
|
@@ -180,8 +270,8 @@ program.hook("preAction", async (thisCommand, actionCommand) => {
|
|
|
180
270
|
// Within each group, commands appear in the order they are added.
|
|
181
271
|
// ---------------------------------------------------------------------------
|
|
182
272
|
// ── Core Workflow ──────────────────────────────────────────────────────
|
|
183
|
-
import {
|
|
184
|
-
program.addCommand(
|
|
273
|
+
import { createRunCommand } from "./commands/run.js";
|
|
274
|
+
program.addCommand(createRunCommand().helpGroup(CommandGroup.CoreWorkflow));
|
|
185
275
|
import { createCompareCommand } from "./commands/compare.js";
|
|
186
276
|
program.addCommand(createCompareCommand().helpGroup(CommandGroup.CoreWorkflow));
|
|
187
277
|
import { createBaselineCommand } from "./commands/baseline.js";
|
|
@@ -192,29 +282,35 @@ import { createRunsCommand } from "./commands/runs.js";
|
|
|
192
282
|
program.addCommand(createRunsCommand().helpGroup(CommandGroup.CoreWorkflow));
|
|
193
283
|
// ── Analysis & Reports ────────────────────────────────────────────────
|
|
194
284
|
import { createReadinessReportCommand } from "./commands/readiness-report.js";
|
|
195
|
-
program.addCommand(createReadinessReportCommand().helpGroup(CommandGroup.AnalysisReports));
|
|
196
285
|
import { createChronicFailuresCommand } from "./commands/chronic-failures.js";
|
|
197
|
-
program.addCommand(createChronicFailuresCommand().helpGroup(CommandGroup.AnalysisReports));
|
|
198
286
|
import { createCoverageAuditCommand } from "./commands/coverage-audit.js";
|
|
199
|
-
program.addCommand(createCoverageAuditCommand().helpGroup(CommandGroup.AnalysisReports));
|
|
200
287
|
import { createDiscoveryReportCommand } from "./commands/discovery-report.js";
|
|
201
|
-
program.addCommand(createDiscoveryReportCommand().helpGroup(CommandGroup.AnalysisReports));
|
|
202
288
|
import { createAgentReportCommand } from "./commands/agent-report.js";
|
|
203
|
-
program.addCommand(createAgentReportCommand().helpGroup(CommandGroup.AnalysisReports));
|
|
204
289
|
import { createWeeklyDigestCommand } from "./commands/weekly-digest.js";
|
|
205
|
-
program.addCommand(createWeeklyDigestCommand().helpGroup(CommandGroup.AnalysisReports));
|
|
206
290
|
import { createCheckStalenessCommand } from "./commands/check-staleness.js";
|
|
207
|
-
|
|
291
|
+
const reportCommand = new Command("report")
|
|
292
|
+
.description("Generate analysis and reporting outputs from evaluation runs")
|
|
293
|
+
.addCommand(createReadinessReportCommand())
|
|
294
|
+
.addCommand(createChronicFailuresCommand())
|
|
295
|
+
.addCommand(createCoverageAuditCommand())
|
|
296
|
+
.addCommand(createDiscoveryReportCommand())
|
|
297
|
+
.addCommand(createAgentReportCommand())
|
|
298
|
+
.addCommand(createWeeklyDigestCommand())
|
|
299
|
+
.addCommand(createCheckStalenessCommand());
|
|
300
|
+
program.addCommand(reportCommand.helpGroup(CommandGroup.AnalysisReports));
|
|
208
301
|
// ── Grader Reliability ────────────────────────────────────────────────
|
|
209
302
|
import { createGraderCommand } from "./commands/grader/index.js";
|
|
210
303
|
program.addCommand(createGraderCommand().helpGroup(CommandGroup.GraderReliability));
|
|
211
304
|
// ── Setup & Configuration ─────────────────────────────────────────────
|
|
212
305
|
import { createInitCommand } from "./commands/init.js";
|
|
213
306
|
program.addCommand(createInitCommand().helpGroup(CommandGroup.SetupConfig));
|
|
214
|
-
import {
|
|
215
|
-
program.addCommand(createValidateCommand().helpGroup(CommandGroup.SetupConfig));
|
|
307
|
+
import { createValidateConfigCommand } from "./commands/validate.js";
|
|
216
308
|
import { createValidateTasksCommand } from "./commands/validate-tasks.js";
|
|
217
|
-
|
|
309
|
+
const validateCommand = new Command("validate")
|
|
310
|
+
.description("Validate AILF configuration and task files")
|
|
311
|
+
.addCommand(createValidateConfigCommand())
|
|
312
|
+
.addCommand(createValidateTasksCommand());
|
|
313
|
+
program.addCommand(validateCommand.helpGroup(CommandGroup.SetupConfig));
|
|
218
314
|
import { createFetchDocsCommand } from "./commands/fetch-docs.js";
|
|
219
315
|
program.addCommand(createFetchDocsCommand().helpGroup(CommandGroup.SetupConfig));
|
|
220
316
|
import { createCacheCommand } from "./commands/cache.js";
|
|
@@ -6,7 +6,7 @@ import { dirname, join } from "path";
|
|
|
6
6
|
import { Command } from "commander";
|
|
7
7
|
import { analyzeResults } from "../pipeline/agent-behavior-report.js";
|
|
8
8
|
export function createAgentReportCommand() {
|
|
9
|
-
return new Command("agent
|
|
9
|
+
return new Command("agent")
|
|
10
10
|
.description("Generate an agent behavior observation report from eval results")
|
|
11
11
|
.argument("[results-path]", "Path to eval-results.json (default: results/latest/eval-results.json)")
|
|
12
12
|
.action(async (resultsPath) => {
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
*/
|
|
13
13
|
import { Command } from "commander";
|
|
14
14
|
export function createCheckStalenessCommand() {
|
|
15
|
-
return new Command("
|
|
15
|
+
return new Command("staleness")
|
|
16
16
|
.description("Exit 1 if no evaluation report has been produced within the max-age window")
|
|
17
17
|
.option("--max-age <days>", "Max age in days before reports are considered stale", (v) => Number.parseInt(v, 10), 3)
|
|
18
18
|
.action(async (opts) => {
|
|
@@ -11,15 +11,15 @@ export function createChronicFailuresCommand() {
|
|
|
11
11
|
return new Command("chronic-failures")
|
|
12
12
|
.description("Identify tasks that error in >50% of recent evaluation runs")
|
|
13
13
|
.option("--lookback <n>", "Number of recent reports to analyze", (v) => parseInt(v, 10), 10)
|
|
14
|
-
.option("--
|
|
15
|
-
.option("--
|
|
14
|
+
.option("--error-rate <n>", "Error rate threshold (0-1) for chronic classification", (v) => parseFloat(v), 0.5)
|
|
15
|
+
.option("-f, --format <fmt>", "Output format: console or json", "console")
|
|
16
16
|
.action(async (opts) => {
|
|
17
17
|
const reportStore = new ReportStore();
|
|
18
18
|
const report = await detectChronicFailures(reportStore, {
|
|
19
19
|
lookback: opts.lookback,
|
|
20
|
-
threshold: opts.
|
|
20
|
+
threshold: opts.errorRate,
|
|
21
21
|
});
|
|
22
|
-
if (opts.json) {
|
|
22
|
+
if (opts.format === "json") {
|
|
23
23
|
console.log(JSON.stringify(report, null, 2));
|
|
24
24
|
}
|
|
25
25
|
else {
|
|
@@ -13,10 +13,9 @@ import { createSanityLiteracyPreset } from "../pipeline/compiler/presets/index.j
|
|
|
13
13
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
14
14
|
const ROOT = resolve(__dirname, "..", "..");
|
|
15
15
|
export function createCoverageAuditCommand() {
|
|
16
|
-
return new Command("coverage
|
|
16
|
+
return new Command("coverage")
|
|
17
17
|
.description("Run documentation coverage audit against feature registry")
|
|
18
|
-
.option("--format <fmt>", "Output format: table, md, markdown")
|
|
19
|
-
.option("--json", "Output raw JSON", false)
|
|
18
|
+
.option("-f, --format <fmt>", "Output format: table, md, markdown, json", "table")
|
|
20
19
|
.action(async (opts) => {
|
|
21
20
|
// Build a registry with mode base + preset so coverage audit works
|
|
22
21
|
// even when config/features.ts is empty (preset is source of truth).
|
|
@@ -28,17 +27,17 @@ export function createCoverageAuditCommand() {
|
|
|
28
27
|
console.error("❌ Coverage audit failed. Ensure config/features.yaml exists and is valid.");
|
|
29
28
|
process.exit(1);
|
|
30
29
|
}
|
|
31
|
-
|
|
30
|
+
const isMarkdown = opts.format === "md" || opts.format === "markdown";
|
|
31
|
+
if (opts.format === "json") {
|
|
32
32
|
console.log(JSON.stringify(report, null, 2));
|
|
33
33
|
}
|
|
34
|
-
else if (
|
|
34
|
+
else if (isMarkdown) {
|
|
35
35
|
console.log(formatCoverageMarkdown(report));
|
|
36
36
|
}
|
|
37
37
|
else {
|
|
38
38
|
console.log(formatCoverageConsole(report));
|
|
39
39
|
}
|
|
40
|
-
|
|
41
|
-
if (!opts.json && opts.format !== "md" && opts.format !== "markdown") {
|
|
40
|
+
if (opts.format !== "json" && !isMarkdown) {
|
|
42
41
|
const docStats = countReferencedDocs(ROOT);
|
|
43
42
|
console.log("DOCUMENT UTILIZATION:");
|
|
44
43
|
console.log(` ${docStats.total} unique document slugs referenced across evaluation tasks`);
|
|
@@ -13,17 +13,29 @@ import { fileURLToPath } from "url";
|
|
|
13
13
|
import { formatDiscoveryMarkdown, generateDiscoveryReport, } from "../pipeline/discovery-report.js";
|
|
14
14
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
15
15
|
const ROOT = resolve(__dirname, "..", "..");
|
|
16
|
+
const DEFAULT_RESULTS_DIR = join(ROOT, "results", "latest");
|
|
17
|
+
/** Resolve `--from-run` to an absolute path to score-summary.json. */
|
|
18
|
+
function resolveFromRun(value) {
|
|
19
|
+
if (value === "latest")
|
|
20
|
+
return join(DEFAULT_RESULTS_DIR, "score-summary.json");
|
|
21
|
+
// If the value points at a directory, look for score-summary.json inside it.
|
|
22
|
+
// Otherwise treat it as a direct file path.
|
|
23
|
+
const resolved = resolve(value);
|
|
24
|
+
return resolved.endsWith(".json")
|
|
25
|
+
? resolved
|
|
26
|
+
: join(resolved, "score-summary.json");
|
|
27
|
+
}
|
|
16
28
|
export function createDiscoveryReportCommand() {
|
|
17
|
-
return new Command("discovery
|
|
29
|
+
return new Command("discovery")
|
|
18
30
|
.description("Generate agent discoverability report from retrieval metrics")
|
|
19
31
|
.option("-a, --area <areas>", "Filter by feature areas (comma-separated)")
|
|
20
32
|
.option("-o, --output <path>", "Write markdown to file instead of stdout")
|
|
21
|
-
.option("-
|
|
33
|
+
.option("--from-run <path>", "Results to read from (`latest`, a results directory, or a direct path to score-summary.json)", "latest")
|
|
22
34
|
.action(async (opts) => {
|
|
23
|
-
const summaryPath = opts.
|
|
35
|
+
const summaryPath = resolveFromRun(opts.fromRun);
|
|
24
36
|
if (!existsSync(summaryPath)) {
|
|
25
37
|
console.error(`❌ Score summary not found: ${summaryPath}`);
|
|
26
|
-
console.error("Run an agentic evaluation first:
|
|
38
|
+
console.error("Run an agentic evaluation first: ailf run --mode agentic");
|
|
27
39
|
process.exit(1);
|
|
28
40
|
}
|
|
29
41
|
const summary = JSON.parse(readFileSync(summaryPath, "utf-8"));
|
package/dist/commands/eval.d.ts
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* eval command — run promptfoo evaluation directly (passthrough).
|
|
3
3
|
*
|
|
4
4
|
* This is a convenience command that forwards to `promptfoo eval`.
|
|
5
|
-
* For most use cases, prefer `ailf
|
|
5
|
+
* For most use cases, prefer `ailf run` which handles the full
|
|
6
6
|
* lifecycle (fetch → generate → eval → score → report).
|
|
7
7
|
*/
|
|
8
8
|
import { Command } from "commander";
|
package/dist/commands/eval.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* eval command — run promptfoo evaluation directly (passthrough).
|
|
3
3
|
*
|
|
4
4
|
* This is a convenience command that forwards to `promptfoo eval`.
|
|
5
|
-
* For most use cases, prefer `ailf
|
|
5
|
+
* For most use cases, prefer `ailf run` which handles the full
|
|
6
6
|
* lifecycle (fetch → generate → eval → score → report).
|
|
7
7
|
*/
|
|
8
8
|
import { Command } from "commander";
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
* Every command is registered in `EXPLAIN_REGISTRY` with either:
|
|
13
13
|
* - **Static metadata** — description, filesRead, filesCreated, steps
|
|
14
14
|
* - **A builder function** — for commands that need to inspect CLI options
|
|
15
|
-
* or perform async work (e.g.,
|
|
15
|
+
* or perform async work (e.g., run, init)
|
|
16
16
|
*
|
|
17
17
|
* Adding --explain support for a new command = adding one registry entry.
|
|
18
18
|
* Commands not in the registry fall back to a minimal generic plan.
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
* Every command is registered in `EXPLAIN_REGISTRY` with either:
|
|
13
13
|
* - **Static metadata** — description, filesRead, filesCreated, steps
|
|
14
14
|
* - **A builder function** — for commands that need to inspect CLI options
|
|
15
|
-
* or perform async work (e.g.,
|
|
15
|
+
* or perform async work (e.g., run, init)
|
|
16
16
|
*
|
|
17
17
|
* Adding --explain support for a new command = adding one registry entry.
|
|
18
18
|
* Commands not in the registry fall back to a minimal generic plan.
|
|
@@ -39,7 +39,7 @@ const EXPLAIN_REGISTRY = {
|
|
|
39
39
|
// ── Dynamic builders (inspect CLI options) ────────────────────────
|
|
40
40
|
baseline: buildBaselineExplainPlan,
|
|
41
41
|
init: buildInitExplainPlan,
|
|
42
|
-
|
|
42
|
+
run: buildPipelineExplainPlan,
|
|
43
43
|
// ── Static metadata ───────────────────────────────────────────────
|
|
44
44
|
"agent-report": {
|
|
45
45
|
description: "Generate an agent behavior observation report from eval results",
|
|
@@ -588,15 +588,11 @@ export async function handleExplain(actionCommand, confirmExecution, rootDir) {
|
|
|
588
588
|
* Build a plan for the `init` command.
|
|
589
589
|
*
|
|
590
590
|
* Shows which files and directories will be created, taking into
|
|
591
|
-
* account the --
|
|
591
|
+
* account the --format and --path flags.
|
|
592
592
|
*/
|
|
593
593
|
function buildInitExplainPlan(actionCommand, rootDir) {
|
|
594
594
|
const opts = actionCommand.opts();
|
|
595
|
-
const format = opts.
|
|
596
|
-
? "json"
|
|
597
|
-
: opts.outputFormat === "yaml"
|
|
598
|
-
? "yaml"
|
|
599
|
-
: "ts";
|
|
595
|
+
const format = opts.format === "json" ? "json" : opts.format === "yaml" ? "yaml" : "ts";
|
|
600
596
|
const taskExt = format === "ts" ? ".task.ts" : format === "yaml" ? ".yaml" : ".json";
|
|
601
597
|
const configFile = format === "ts"
|
|
602
598
|
? "ailf.config.ts"
|
|
@@ -667,69 +663,44 @@ function buildBaselineExplainPlan(actionCommand, rootDir) {
|
|
|
667
663
|
});
|
|
668
664
|
}
|
|
669
665
|
/**
|
|
670
|
-
* Build a plan for the `
|
|
666
|
+
* Build a plan for the `run` command — the richest plan with steps,
|
|
671
667
|
* tasks, models, cost estimates, and cache predictions.
|
|
672
668
|
*/
|
|
673
669
|
async function buildPipelineExplainPlan(actionCommand, rootDir) {
|
|
674
670
|
const raw = actionCommand.opts();
|
|
675
671
|
// Merge Commander-parsed opts with safe defaults for array/boolean fields
|
|
676
672
|
const withDefaults = {
|
|
677
|
-
allowedOrigin: raw.allowedOrigin ?? [],
|
|
678
|
-
allowedOrigins: raw.allowedOrigins ?? [],
|
|
679
673
|
area: raw.area,
|
|
680
674
|
autoScope: raw.autoScope ?? true,
|
|
681
|
-
|
|
675
|
+
beforeSource: raw.beforeSource,
|
|
682
676
|
cache: raw.cache ?? true,
|
|
683
677
|
changedDocs: raw.changedDocs,
|
|
684
|
-
compare: raw.compare
|
|
685
|
-
compareBaseline: raw.compareBaseline,
|
|
686
|
-
concurrency: raw.concurrency,
|
|
678
|
+
compare: raw.compare,
|
|
687
679
|
debug: raw.debug ?? false,
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
discoveryReport: raw.discoveryReport ?? false,
|
|
680
|
+
filterFirstN: raw.filterFirstN,
|
|
681
|
+
filterPattern: raw.filterPattern,
|
|
682
|
+
filterSample: raw.filterSample,
|
|
692
683
|
dryRun: raw.dryRun ?? false,
|
|
693
|
-
gapAnalysis: raw.gapAnalysis ?? true,
|
|
694
|
-
graderReplications: raw.graderReplications,
|
|
695
|
-
header: raw.header ?? [],
|
|
696
|
-
headers: raw.headers ?? [],
|
|
697
684
|
mode: raw.mode ?? LiteracyVariant.FULL,
|
|
698
685
|
output: raw.output,
|
|
699
686
|
promptfooUrl: raw.promptfooUrl,
|
|
700
687
|
publish: raw.publish,
|
|
701
688
|
publishTag: raw.publishTag,
|
|
702
|
-
readiness: raw.readiness ?? false,
|
|
703
|
-
reportDataset: raw.reportDataset,
|
|
704
|
-
reportProject: raw.reportProject,
|
|
705
|
-
sanityDataset: raw.sanityDataset,
|
|
706
689
|
sanityDocument: raw.sanityDocument ?? [],
|
|
707
|
-
sanityDocuments: raw.sanityDocuments ?? [],
|
|
708
690
|
sanityPerspective: raw.sanityPerspective,
|
|
709
|
-
sanityProject: raw.sanityProject,
|
|
710
|
-
sanityStudioOrigin: raw.sanityStudioOrigin,
|
|
711
691
|
search: raw.search,
|
|
712
|
-
|
|
713
|
-
|
|
692
|
+
eval: raw.eval ?? true,
|
|
693
|
+
fetch: raw.fetch ?? true,
|
|
714
694
|
source: raw.source,
|
|
715
695
|
tag: raw.tag ?? [],
|
|
716
696
|
task: raw.task,
|
|
717
697
|
threshold: raw.threshold,
|
|
718
698
|
url: raw.url ?? [],
|
|
719
|
-
urls: raw.urls ?? [],
|
|
720
699
|
remote: raw.remote ?? false,
|
|
721
|
-
apiUrl: raw.apiUrl,
|
|
722
|
-
repoTasksPath: raw.repoTasksPath,
|
|
723
|
-
taskSource: raw.taskSource,
|
|
724
700
|
remoteCache: raw.remoteCache,
|
|
725
701
|
config: raw.config,
|
|
726
|
-
|
|
727
|
-
artifactsDir: raw.artifactsDir,
|
|
728
|
-
artifactsDryRun: raw.artifactsDryRun ?? false,
|
|
729
|
-
artifactsExclude: raw.artifactsExclude,
|
|
702
|
+
artifactsWrite: raw.artifactsWrite ?? true,
|
|
730
703
|
classification: raw.classification,
|
|
731
|
-
ownerTeam: raw.ownerTeam,
|
|
732
|
-
ownerIndividual: raw.ownerIndividual,
|
|
733
704
|
purpose: raw.purpose,
|
|
734
705
|
label: raw.label ?? [],
|
|
735
706
|
};
|
|
@@ -742,7 +713,6 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
|
|
|
742
713
|
compareThreshold: resolved.compareThreshold,
|
|
743
714
|
concurrency: resolved.concurrency,
|
|
744
715
|
debug: resolved.debug,
|
|
745
|
-
discoveryReportEnabled: resolved.discoveryReportEnabled,
|
|
746
716
|
dryRun: resolved.dryRun,
|
|
747
717
|
gapAnalysisEnabled: resolved.gapAnalysisEnabled,
|
|
748
718
|
graderReplications: resolved.graderReplications,
|
|
@@ -750,7 +720,6 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
|
|
|
750
720
|
variant: resolved.variant,
|
|
751
721
|
noCache: resolved.noCache,
|
|
752
722
|
publishEnabled: resolved.publishEnabled,
|
|
753
|
-
readinessEnabled: resolved.readinessEnabled,
|
|
754
723
|
skipEval: resolved.skipEval,
|
|
755
724
|
skipFetch: resolved.skipFetch,
|
|
756
725
|
source: resolved.source,
|
|
@@ -95,16 +95,16 @@ export function createGraderCommand() {
|
|
|
95
95
|
.command("validate")
|
|
96
96
|
.description("Validate grader accuracy against human reference grades")
|
|
97
97
|
.option("-g, --grader <model>", "Grader model to validate")
|
|
98
|
-
.option("-
|
|
98
|
+
.option("--mae-threshold <n>", "MAE threshold for pass/fail", parseFloat, 10)
|
|
99
99
|
.action(async (opts) => {
|
|
100
100
|
try {
|
|
101
101
|
const result = await runGraderValidate({
|
|
102
102
|
graderModel: opts.grader,
|
|
103
|
-
maeThreshold: opts.
|
|
103
|
+
maeThreshold: opts.maeThreshold,
|
|
104
104
|
rootDir: ROOT,
|
|
105
105
|
});
|
|
106
106
|
if (!result.passesThreshold) {
|
|
107
|
-
console.error(`\n ❌ VALIDATION FAILED: MAE ${result.overallMae} exceeds threshold ${opts.
|
|
107
|
+
console.error(`\n ❌ VALIDATION FAILED: MAE ${result.overallMae} exceeds threshold ${opts.maeThreshold}`);
|
|
108
108
|
process.exit(1);
|
|
109
109
|
}
|
|
110
110
|
}
|
package/dist/commands/init.d.ts
CHANGED
|
@@ -11,8 +11,8 @@
|
|
|
11
11
|
*
|
|
12
12
|
* Usage:
|
|
13
13
|
* ailf init # TypeScript output (default)
|
|
14
|
-
* ailf init --
|
|
15
|
-
* ailf init --
|
|
14
|
+
* ailf init --format yaml # YAML output
|
|
15
|
+
* ailf init --format json # JSON output
|
|
16
16
|
* ailf init --force # overwrite existing files
|
|
17
17
|
* ailf init --path ./my-dir # target a specific directory
|
|
18
18
|
*/
|
package/dist/commands/init.js
CHANGED
|
@@ -11,8 +11,8 @@
|
|
|
11
11
|
*
|
|
12
12
|
* Usage:
|
|
13
13
|
* ailf init # TypeScript output (default)
|
|
14
|
-
* ailf init --
|
|
15
|
-
* ailf init --
|
|
14
|
+
* ailf init --format yaml # YAML output
|
|
15
|
+
* ailf init --format json # JSON output
|
|
16
16
|
* ailf init --force # overwrite existing files
|
|
17
17
|
* ailf init --path ./my-dir # target a specific directory
|
|
18
18
|
*/
|
|
@@ -27,7 +27,7 @@ import { probeUserLocalAilf } from "../adapters/config-sources/ailf-resolver.js"
|
|
|
27
27
|
export function createInitCommand() {
|
|
28
28
|
return new Command("init")
|
|
29
29
|
.description("Initialize a directory for AI Literacy Framework evaluation")
|
|
30
|
-
.option("--
|
|
30
|
+
.option("-f, --format <fmt>", 'Output format for generated files: "ts" (default), "yaml", or "json"', "ts")
|
|
31
31
|
.option("--force", "Overwrite existing files", false)
|
|
32
32
|
.option("--path <dir>", "Target directory (default: current directory)", ".")
|
|
33
33
|
.option("--mode <mode>", "Scaffold for a specific mode: literacy, mcp-server, custom (default: all modes)")
|
|
@@ -63,15 +63,15 @@ function taskStemsForMode(mode) {
|
|
|
63
63
|
// ---------------------------------------------------------------------------
|
|
64
64
|
async function runInit(opts) {
|
|
65
65
|
const validFormats = new Set(["ts", "yaml", "json"]);
|
|
66
|
-
if (!validFormats.has(opts.
|
|
67
|
-
console.error(` ✗ Invalid output format "${opts.
|
|
66
|
+
if (!validFormats.has(opts.format)) {
|
|
67
|
+
console.error(` ✗ Invalid output format "${opts.format}". Valid options: ts, yaml, json`);
|
|
68
68
|
process.exitCode = 1;
|
|
69
69
|
return;
|
|
70
70
|
}
|
|
71
|
-
const format = opts.
|
|
71
|
+
const format = opts.format;
|
|
72
72
|
const force = opts.force;
|
|
73
73
|
if (format === "yaml") {
|
|
74
|
-
console.warn(" ⚠ --
|
|
74
|
+
console.warn(" ⚠ --format yaml is deprecated. TypeScript (default) is the\n" +
|
|
75
75
|
" recommended format — it provides full IDE autocomplete via defineTask().\n" +
|
|
76
76
|
" YAML output will be removed in a future release.\n");
|
|
77
77
|
}
|
|
@@ -285,10 +285,11 @@ async function runInit(opts) {
|
|
|
285
285
|
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --debug");
|
|
286
286
|
console.log();
|
|
287
287
|
console.log(" 💡 Or test a remote run against your repo tasks:");
|
|
288
|
-
console.log("
|
|
288
|
+
console.log(" # First, set `taskSource: { type: repo }` in .ailf/config.yaml");
|
|
289
|
+
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest run --remote --debug");
|
|
289
290
|
console.log();
|
|
290
291
|
console.log(" 💡 Or run locally against your repo tasks:");
|
|
291
|
-
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest
|
|
292
|
+
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest run --mode=literacy --variant=full --debug --explain -y");
|
|
292
293
|
console.log();
|
|
293
294
|
}
|
|
294
295
|
// ---------------------------------------------------------------------------
|