@sanity/ailf 3.8.0 → 3.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/config-sources/file-config-adapter.js +4 -5
- package/dist/adapters/task-sources/repo-schemas.d.ts +3 -3
- package/dist/cli-program.d.ts +39 -0
- package/dist/cli-program.js +137 -0
- package/dist/cli.d.ts +6 -0
- package/dist/cli.js +12 -122
- package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
- package/package.json +5 -3
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +0 -366
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +0 -9
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +0 -145
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +0 -314
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +0 -486
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +0 -425
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +0 -9
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +0 -332
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +0 -12
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +0 -210
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +0 -7
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +0 -404
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +0 -184
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +0 -8
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +0 -301
- package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +0 -9
- package/dist/pipeline/compiler/__tests__/telemetry.test.js +0 -503
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +0 -509
|
@@ -47,7 +47,7 @@ export class FileConfigAdapter {
|
|
|
47
47
|
return this.validateAndMap(result.value, ext);
|
|
48
48
|
}
|
|
49
49
|
// YAML / JSON files — load via fs
|
|
50
|
-
const raw = readConfigFile(this.filePath);
|
|
50
|
+
const raw = await readConfigFile(this.filePath);
|
|
51
51
|
return this.validateAndMap(raw, ext);
|
|
52
52
|
}
|
|
53
53
|
/**
|
|
@@ -69,13 +69,12 @@ export class FileConfigAdapter {
|
|
|
69
69
|
// ---------------------------------------------------------------------------
|
|
70
70
|
// Helpers
|
|
71
71
|
// ---------------------------------------------------------------------------
|
|
72
|
-
function readConfigFile(filePath) {
|
|
72
|
+
async function readConfigFile(filePath) {
|
|
73
73
|
const content = readFileSync(filePath, "utf-8");
|
|
74
74
|
const ext = extname(filePath).toLowerCase();
|
|
75
75
|
if (ext === ".yaml" || ext === ".yml") {
|
|
76
|
-
// Dynamic import
|
|
77
|
-
|
|
78
|
-
const { parse } = require("yaml");
|
|
76
|
+
// Dynamic ESM import — only loaded when reading YAML configs.
|
|
77
|
+
const { parse } = await import("yaml");
|
|
79
78
|
return parse(content);
|
|
80
79
|
}
|
|
81
80
|
return JSON.parse(content);
|
|
@@ -147,8 +147,8 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
147
147
|
baseline: z.ZodOptional<z.ZodObject<{
|
|
148
148
|
enabled: z.ZodOptional<z.ZodBoolean>;
|
|
149
149
|
rubric: z.ZodOptional<z.ZodEnum<{
|
|
150
|
-
full: "full";
|
|
151
150
|
abbreviated: "abbreviated";
|
|
151
|
+
full: "full";
|
|
152
152
|
none: "none";
|
|
153
153
|
}>>;
|
|
154
154
|
}, z.core.$strip>>;
|
|
@@ -773,8 +773,8 @@ export declare const ContentLakeAuthorableTaskSchema: z.ZodObject<{
|
|
|
773
773
|
baseline: z.ZodOptional<z.ZodObject<{
|
|
774
774
|
enabled: z.ZodOptional<z.ZodBoolean>;
|
|
775
775
|
rubric: z.ZodOptional<z.ZodEnum<{
|
|
776
|
-
full: "full";
|
|
777
776
|
abbreviated: "abbreviated";
|
|
777
|
+
full: "full";
|
|
778
778
|
none: "none";
|
|
779
779
|
}>>;
|
|
780
780
|
}, z.core.$strip>>;
|
|
@@ -893,8 +893,8 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
|
|
|
893
893
|
baseline: z.ZodOptional<z.ZodObject<{
|
|
894
894
|
enabled: z.ZodOptional<z.ZodBoolean>;
|
|
895
895
|
rubric: z.ZodOptional<z.ZodEnum<{
|
|
896
|
-
full: "full";
|
|
897
896
|
abbreviated: "abbreviated";
|
|
897
|
+
full: "full";
|
|
898
898
|
none: "none";
|
|
899
899
|
}>>;
|
|
900
900
|
}, z.core.$strip>>;
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* cli-program.ts — pure factory for the AILF Commander program.
|
|
3
|
+
*
|
|
4
|
+
* Splits the program construction out of cli.ts so the CLI is testable
|
|
5
|
+
* in-process. cli.ts owns bootstrap side effects (dotenv loading,
|
|
6
|
+
* retired-flag/env/cmd checks, AILF_LOG_LEVEL pre-scan, parseAsync); this
|
|
7
|
+
* module owns command wiring.
|
|
8
|
+
*
|
|
9
|
+
* The W0078 M4 black-box harness imports `buildCliProgram()` directly so
|
|
10
|
+
* tests can construct a fresh program, attach `exitOverride()`, capture
|
|
11
|
+
* stdout/stderr, and parse a synthetic argv — all without spawning a
|
|
12
|
+
* subprocess.
|
|
13
|
+
*
|
|
14
|
+
* @see packages/eval/src/__tests__/cli-harness/run-cli.ts
|
|
15
|
+
*/
|
|
16
|
+
import { Command } from "commander";
|
|
17
|
+
/**
|
|
18
|
+
* Options for `buildCliProgram`.
|
|
19
|
+
*/
|
|
20
|
+
export interface BuildCliProgramOptions {
|
|
21
|
+
/**
|
|
22
|
+
* Path to the eval package root (the directory containing package.json).
|
|
23
|
+
* Used to resolve the version string and as the root passed to the
|
|
24
|
+
* `--explain` handler.
|
|
25
|
+
*/
|
|
26
|
+
evalRoot: string;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Construct the Commander program with every subcommand registered.
|
|
30
|
+
*
|
|
31
|
+
* Pure factory — no I/O beyond reading package.json for the version, no
|
|
32
|
+
* `process.exit()`, no `process.argv` access. Tests can call this and
|
|
33
|
+
* attach `program.exitOverride()` before parsing to capture exit codes
|
|
34
|
+
* instead of terminating the process.
|
|
35
|
+
*
|
|
36
|
+
* Registration order determines group display order in `--help`. Commands
|
|
37
|
+
* within a group appear in the order they're added.
|
|
38
|
+
*/
|
|
39
|
+
export declare function buildCliProgram(opts: BuildCliProgramOptions): Command;
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* cli-program.ts — pure factory for the AILF Commander program.
|
|
3
|
+
*
|
|
4
|
+
* Splits the program construction out of cli.ts so the CLI is testable
|
|
5
|
+
* in-process. cli.ts owns bootstrap side effects (dotenv loading,
|
|
6
|
+
* retired-flag/env/cmd checks, AILF_LOG_LEVEL pre-scan, parseAsync); this
|
|
7
|
+
* module owns command wiring.
|
|
8
|
+
*
|
|
9
|
+
* The W0078 M4 black-box harness imports `buildCliProgram()` directly so
|
|
10
|
+
* tests can construct a fresh program, attach `exitOverride()`, capture
|
|
11
|
+
* stdout/stderr, and parse a synthetic argv — all without spawning a
|
|
12
|
+
* subprocess.
|
|
13
|
+
*
|
|
14
|
+
* @see packages/eval/src/__tests__/cli-harness/run-cli.ts
|
|
15
|
+
*/
|
|
16
|
+
import { Command } from "commander";
|
|
17
|
+
import { readFileSync } from "node:fs";
|
|
18
|
+
import { resolve } from "node:path";
|
|
19
|
+
import { CommandGroup, configureProgram } from "./commands/shared/help.js";
|
|
20
|
+
import { createAgentReportCommand } from "./commands/agent-report.js";
|
|
21
|
+
import { createBaselineCommand } from "./commands/baseline.js";
|
|
22
|
+
import { createCacheCommand } from "./commands/cache.js";
|
|
23
|
+
import { createCalculateScoresCommand } from "./commands/calculate-scores.js";
|
|
24
|
+
import { createCheckStalenessCommand } from "./commands/check-staleness.js";
|
|
25
|
+
import { createChronicFailuresCommand } from "./commands/chronic-failures.js";
|
|
26
|
+
import { createCompareCommand } from "./commands/compare.js";
|
|
27
|
+
import { createCompletionCommand } from "./commands/completion.js";
|
|
28
|
+
import { createCoverageAuditCommand } from "./commands/coverage-audit.js";
|
|
29
|
+
import { createDiscoveryReportCommand } from "./commands/discovery-report.js";
|
|
30
|
+
import { createEvalCommand } from "./commands/eval.js";
|
|
31
|
+
import { createFetchDocsCommand } from "./commands/fetch-docs.js";
|
|
32
|
+
import { createGenerateConfigsCommand } from "./commands/generate-configs.js";
|
|
33
|
+
import { createGraderCommand } from "./commands/grader/index.js";
|
|
34
|
+
import { createInitCommand } from "./commands/init.js";
|
|
35
|
+
import { createInteractiveCommand } from "./commands/interactive.js";
|
|
36
|
+
import { createLookupDocCommand } from "./commands/lookup-doc.js";
|
|
37
|
+
import { createMeasureRetrievalCommand } from "./commands/measure-retrieval.js";
|
|
38
|
+
import { createPrCommentCommand } from "./commands/pr-comment.js";
|
|
39
|
+
import { createPublishCommand } from "./commands/publish.js";
|
|
40
|
+
import { createReadinessReportCommand } from "./commands/readiness-report.js";
|
|
41
|
+
import { createRunCommand } from "./commands/run.js";
|
|
42
|
+
import { createRunsCommand } from "./commands/runs.js";
|
|
43
|
+
import { createValidateConfigCommand } from "./commands/validate.js";
|
|
44
|
+
import { createValidateTasksCommand } from "./commands/validate-tasks.js";
|
|
45
|
+
import { createWebhookServerCommand } from "./commands/webhook-server.js";
|
|
46
|
+
import { createWeeklyDigestCommand } from "./commands/weekly-digest.js";
|
|
47
|
+
/**
|
|
48
|
+
* Construct the Commander program with every subcommand registered.
|
|
49
|
+
*
|
|
50
|
+
* Pure factory — no I/O beyond reading package.json for the version, no
|
|
51
|
+
* `process.exit()`, no `process.argv` access. Tests can call this and
|
|
52
|
+
* attach `program.exitOverride()` before parsing to capture exit codes
|
|
53
|
+
* instead of terminating the process.
|
|
54
|
+
*
|
|
55
|
+
* Registration order determines group display order in `--help`. Commands
|
|
56
|
+
* within a group appear in the order they're added.
|
|
57
|
+
*/
|
|
58
|
+
export function buildCliProgram(opts) {
|
|
59
|
+
const { evalRoot } = opts;
|
|
60
|
+
const pkgPath = resolve(evalRoot, "package.json");
|
|
61
|
+
const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
|
|
62
|
+
const program = new Command()
|
|
63
|
+
.name("ailf")
|
|
64
|
+
.description("AI Literacy Framework — evaluate how well docs enable AI coding tools\n\nMeasure whether AI coding agents can find the right documentation\nand produce correct implementations of your product features.")
|
|
65
|
+
.version(pkg.version)
|
|
66
|
+
.option("-v, --verbose", "Increase log output")
|
|
67
|
+
.option("-q, --quiet", "Suppress non-error output")
|
|
68
|
+
.option("--dotenv <path>", "Override default .env file path")
|
|
69
|
+
.option("--explain", "Show execution plan without running")
|
|
70
|
+
.option("--format <fmt>", "Output format for --explain (console, json)", "console")
|
|
71
|
+
.option("-y, --yes", "With --explain: show plan then prompt to confirm execution");
|
|
72
|
+
configureProgram(program);
|
|
73
|
+
// Global --explain hook — intercepts any command before execution
|
|
74
|
+
program.hook("preAction", async (thisCommand, actionCommand) => {
|
|
75
|
+
const globalOpts = thisCommand.opts();
|
|
76
|
+
if (!globalOpts.explain)
|
|
77
|
+
return;
|
|
78
|
+
const { handleExplain } = await import("./commands/explain-handler.js");
|
|
79
|
+
try {
|
|
80
|
+
await handleExplain(actionCommand, globalOpts.yes ?? false, evalRoot);
|
|
81
|
+
process.exit(0);
|
|
82
|
+
}
|
|
83
|
+
catch (err) {
|
|
84
|
+
// Sentinel from --yes confirmation: user wants to proceed
|
|
85
|
+
if (err !== null &&
|
|
86
|
+
typeof err === "object" &&
|
|
87
|
+
"__proceedArgv" in err) {
|
|
88
|
+
const filteredArgv = err.__proceedArgv;
|
|
89
|
+
console.log("\n ▸ Proceeding with execution…\n");
|
|
90
|
+
await program.parseAsync(filteredArgv);
|
|
91
|
+
return;
|
|
92
|
+
}
|
|
93
|
+
throw err;
|
|
94
|
+
}
|
|
95
|
+
});
|
|
96
|
+
// ── Core Workflow ──────────────────────────────────────────────────────
|
|
97
|
+
program.addCommand(createRunCommand().helpGroup(CommandGroup.CoreWorkflow));
|
|
98
|
+
program.addCommand(createCompareCommand().helpGroup(CommandGroup.CoreWorkflow));
|
|
99
|
+
program.addCommand(createBaselineCommand().helpGroup(CommandGroup.CoreWorkflow));
|
|
100
|
+
program.addCommand(createPublishCommand().helpGroup(CommandGroup.CoreWorkflow));
|
|
101
|
+
program.addCommand(createRunsCommand().helpGroup(CommandGroup.CoreWorkflow));
|
|
102
|
+
// ── Analysis & Reports ────────────────────────────────────────────────
|
|
103
|
+
const reportCommand = new Command("report")
|
|
104
|
+
.description("Generate analysis and reporting outputs from evaluation runs")
|
|
105
|
+
.addCommand(createReadinessReportCommand())
|
|
106
|
+
.addCommand(createChronicFailuresCommand())
|
|
107
|
+
.addCommand(createCoverageAuditCommand())
|
|
108
|
+
.addCommand(createDiscoveryReportCommand())
|
|
109
|
+
.addCommand(createAgentReportCommand())
|
|
110
|
+
.addCommand(createWeeklyDigestCommand())
|
|
111
|
+
.addCommand(createCheckStalenessCommand());
|
|
112
|
+
program.addCommand(reportCommand.helpGroup(CommandGroup.AnalysisReports));
|
|
113
|
+
// ── Grader Reliability ────────────────────────────────────────────────
|
|
114
|
+
program.addCommand(createGraderCommand().helpGroup(CommandGroup.GraderReliability));
|
|
115
|
+
// ── Setup & Configuration ─────────────────────────────────────────────
|
|
116
|
+
program.addCommand(createInitCommand().helpGroup(CommandGroup.SetupConfig));
|
|
117
|
+
const validateCommand = new Command("validate")
|
|
118
|
+
.description("Validate AILF configuration and task files")
|
|
119
|
+
.addCommand(createValidateConfigCommand())
|
|
120
|
+
.addCommand(createValidateTasksCommand());
|
|
121
|
+
program.addCommand(validateCommand.helpGroup(CommandGroup.SetupConfig));
|
|
122
|
+
program.addCommand(createFetchDocsCommand().helpGroup(CommandGroup.SetupConfig));
|
|
123
|
+
program.addCommand(createCacheCommand().helpGroup(CommandGroup.SetupConfig));
|
|
124
|
+
// ── Pipeline Internals ────────────────────────────────────────────────
|
|
125
|
+
program.addCommand(createEvalCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
126
|
+
program.addCommand(createCalculateScoresCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
127
|
+
program.addCommand(createPrCommentCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
128
|
+
program.addCommand(createGenerateConfigsCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
129
|
+
program.addCommand(createMeasureRetrievalCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
130
|
+
program.addCommand(createLookupDocCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
131
|
+
program.addCommand(createWebhookServerCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
132
|
+
// ── Developer Tools ───────────────────────────────────────────────────
|
|
133
|
+
program.addCommand(createInteractiveCommand().helpGroup(CommandGroup.DeveloperTools));
|
|
134
|
+
// Shell completion — must be registered last (needs full program tree)
|
|
135
|
+
program.addCommand(createCompletionCommand(program).helpGroup(CommandGroup.DeveloperTools));
|
|
136
|
+
return program;
|
|
137
|
+
}
|
package/dist/cli.d.ts
CHANGED
|
@@ -25,5 +25,11 @@
|
|
|
25
25
|
*
|
|
26
26
|
* Dev mode (without building):
|
|
27
27
|
* tsx src/cli.ts run --debug
|
|
28
|
+
*
|
|
29
|
+
* Module split: this file owns *bootstrap side effects* (dotenv,
|
|
30
|
+
* retired-flag/env/cmd checks, AILF_LOG_LEVEL pre-scan, parseAsync).
|
|
31
|
+
* The Commander wiring lives in ./cli-program.ts so the W0078 M4 black-box
|
|
32
|
+
* harness can build the program in-process without firing those side
|
|
33
|
+
* effects.
|
|
28
34
|
*/
|
|
29
35
|
export {};
|
package/dist/cli.js
CHANGED
|
@@ -1,6 +1,4 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
/* oxlint-disable import/first -- imports are intentionally interleaved with
|
|
3
|
-
command registration for readability and lazy loading */
|
|
4
2
|
/**
|
|
5
3
|
* cli.ts — AILF CLI entry point.
|
|
6
4
|
*
|
|
@@ -27,15 +25,20 @@
|
|
|
27
25
|
*
|
|
28
26
|
* Dev mode (without building):
|
|
29
27
|
* tsx src/cli.ts run --debug
|
|
28
|
+
*
|
|
29
|
+
* Module split: this file owns *bootstrap side effects* (dotenv,
|
|
30
|
+
* retired-flag/env/cmd checks, AILF_LOG_LEVEL pre-scan, parseAsync).
|
|
31
|
+
* The Commander wiring lives in ./cli-program.ts so the W0078 M4 black-box
|
|
32
|
+
* harness can build the program in-process without firing those side
|
|
33
|
+
* effects.
|
|
30
34
|
*/
|
|
31
35
|
import { config as dotenvConfig } from "dotenv";
|
|
32
|
-
import { existsSync
|
|
36
|
+
import { existsSync } from "fs";
|
|
33
37
|
import { dirname, resolve } from "path";
|
|
34
38
|
import { fileURLToPath } from "url";
|
|
39
|
+
import { buildCliProgram } from "./cli-program.js";
|
|
35
40
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
36
41
|
const ROOT = resolve(__dirname, "..");
|
|
37
|
-
/** Path to the eval package root (packages/eval). Used by --explain. */
|
|
38
|
-
const EVAL_ROOT = ROOT;
|
|
39
42
|
// ---------------------------------------------------------------------------
|
|
40
43
|
// Load .env — must happen before Commander parses so that .env()
|
|
41
44
|
// fallbacks resolve correctly.
|
|
@@ -220,127 +223,14 @@ if (retiredFlag || retiredCommand || retiredEnv) {
|
|
|
220
223
|
process.exit(2);
|
|
221
224
|
}
|
|
222
225
|
// ---------------------------------------------------------------------------
|
|
223
|
-
// Build CLI program
|
|
224
|
-
// ---------------------------------------------------------------------------
|
|
225
|
-
import { Command } from "commander";
|
|
226
|
-
import { CommandGroup, configureProgram } from "./commands/shared/help.js";
|
|
227
|
-
// Read version from package.json
|
|
228
|
-
const pkgPath = resolve(ROOT, "package.json");
|
|
229
|
-
const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
|
|
230
|
-
const program = new Command()
|
|
231
|
-
.name("ailf")
|
|
232
|
-
.description("AI Literacy Framework — evaluate how well docs enable AI coding tools\n\nMeasure whether AI coding agents can find the right documentation\nand produce correct implementations of your product features.")
|
|
233
|
-
.version(pkg.version)
|
|
234
|
-
.option("-v, --verbose", "Increase log output")
|
|
235
|
-
.option("-q, --quiet", "Suppress non-error output")
|
|
236
|
-
.option("--dotenv <path>", "Override default .env file path")
|
|
237
|
-
.option("--explain", "Show execution plan without running")
|
|
238
|
-
.option("--format <fmt>", "Output format for --explain (console, json)", "console")
|
|
239
|
-
.option("-y, --yes", "With --explain: show plan then prompt to confirm execution");
|
|
240
|
-
configureProgram(program);
|
|
241
|
-
// ---------------------------------------------------------------------------
|
|
242
|
-
// Global --explain hook — intercepts any command before execution
|
|
226
|
+
// Build CLI program (delegates Commander wiring to ./cli-program.ts)
|
|
243
227
|
// ---------------------------------------------------------------------------
|
|
244
|
-
program
|
|
245
|
-
const globalOpts = thisCommand.opts();
|
|
246
|
-
if (!globalOpts.explain)
|
|
247
|
-
return;
|
|
248
|
-
const { handleExplain } = await import("./commands/explain-handler.js");
|
|
249
|
-
try {
|
|
250
|
-
await handleExplain(actionCommand, globalOpts.yes ?? false, EVAL_ROOT);
|
|
251
|
-
process.exit(0);
|
|
252
|
-
}
|
|
253
|
-
catch (err) {
|
|
254
|
-
// Sentinel from --yes confirmation: user wants to proceed
|
|
255
|
-
if (err !== null &&
|
|
256
|
-
typeof err === "object" &&
|
|
257
|
-
"__proceedArgv" in err) {
|
|
258
|
-
const filteredArgv = err.__proceedArgv;
|
|
259
|
-
console.log("\n ▸ Proceeding with execution…\n");
|
|
260
|
-
await program.parseAsync(filteredArgv);
|
|
261
|
-
return;
|
|
262
|
-
}
|
|
263
|
-
throw err;
|
|
264
|
-
}
|
|
265
|
-
});
|
|
228
|
+
const program = buildCliProgram({ evalRoot: ROOT });
|
|
266
229
|
// ---------------------------------------------------------------------------
|
|
267
|
-
//
|
|
230
|
+
// Parse and run — default to showing help when no arguments given.
|
|
268
231
|
//
|
|
269
|
-
// Registration order determines group display order in --help.
|
|
270
|
-
// Within each group, commands appear in the order they are added.
|
|
271
|
-
// ---------------------------------------------------------------------------
|
|
272
|
-
// ── Core Workflow ──────────────────────────────────────────────────────
|
|
273
|
-
import { createRunCommand } from "./commands/run.js";
|
|
274
|
-
program.addCommand(createRunCommand().helpGroup(CommandGroup.CoreWorkflow));
|
|
275
|
-
import { createCompareCommand } from "./commands/compare.js";
|
|
276
|
-
program.addCommand(createCompareCommand().helpGroup(CommandGroup.CoreWorkflow));
|
|
277
|
-
import { createBaselineCommand } from "./commands/baseline.js";
|
|
278
|
-
program.addCommand(createBaselineCommand().helpGroup(CommandGroup.CoreWorkflow));
|
|
279
|
-
import { createPublishCommand } from "./commands/publish.js";
|
|
280
|
-
program.addCommand(createPublishCommand().helpGroup(CommandGroup.CoreWorkflow));
|
|
281
|
-
import { createRunsCommand } from "./commands/runs.js";
|
|
282
|
-
program.addCommand(createRunsCommand().helpGroup(CommandGroup.CoreWorkflow));
|
|
283
|
-
// ── Analysis & Reports ────────────────────────────────────────────────
|
|
284
|
-
import { createReadinessReportCommand } from "./commands/readiness-report.js";
|
|
285
|
-
import { createChronicFailuresCommand } from "./commands/chronic-failures.js";
|
|
286
|
-
import { createCoverageAuditCommand } from "./commands/coverage-audit.js";
|
|
287
|
-
import { createDiscoveryReportCommand } from "./commands/discovery-report.js";
|
|
288
|
-
import { createAgentReportCommand } from "./commands/agent-report.js";
|
|
289
|
-
import { createWeeklyDigestCommand } from "./commands/weekly-digest.js";
|
|
290
|
-
import { createCheckStalenessCommand } from "./commands/check-staleness.js";
|
|
291
|
-
const reportCommand = new Command("report")
|
|
292
|
-
.description("Generate analysis and reporting outputs from evaluation runs")
|
|
293
|
-
.addCommand(createReadinessReportCommand())
|
|
294
|
-
.addCommand(createChronicFailuresCommand())
|
|
295
|
-
.addCommand(createCoverageAuditCommand())
|
|
296
|
-
.addCommand(createDiscoveryReportCommand())
|
|
297
|
-
.addCommand(createAgentReportCommand())
|
|
298
|
-
.addCommand(createWeeklyDigestCommand())
|
|
299
|
-
.addCommand(createCheckStalenessCommand());
|
|
300
|
-
program.addCommand(reportCommand.helpGroup(CommandGroup.AnalysisReports));
|
|
301
|
-
// ── Grader Reliability ────────────────────────────────────────────────
|
|
302
|
-
import { createGraderCommand } from "./commands/grader/index.js";
|
|
303
|
-
program.addCommand(createGraderCommand().helpGroup(CommandGroup.GraderReliability));
|
|
304
|
-
// ── Setup & Configuration ─────────────────────────────────────────────
|
|
305
|
-
import { createInitCommand } from "./commands/init.js";
|
|
306
|
-
program.addCommand(createInitCommand().helpGroup(CommandGroup.SetupConfig));
|
|
307
|
-
import { createValidateConfigCommand } from "./commands/validate.js";
|
|
308
|
-
import { createValidateTasksCommand } from "./commands/validate-tasks.js";
|
|
309
|
-
const validateCommand = new Command("validate")
|
|
310
|
-
.description("Validate AILF configuration and task files")
|
|
311
|
-
.addCommand(createValidateConfigCommand())
|
|
312
|
-
.addCommand(createValidateTasksCommand());
|
|
313
|
-
program.addCommand(validateCommand.helpGroup(CommandGroup.SetupConfig));
|
|
314
|
-
import { createFetchDocsCommand } from "./commands/fetch-docs.js";
|
|
315
|
-
program.addCommand(createFetchDocsCommand().helpGroup(CommandGroup.SetupConfig));
|
|
316
|
-
import { createCacheCommand } from "./commands/cache.js";
|
|
317
|
-
program.addCommand(createCacheCommand().helpGroup(CommandGroup.SetupConfig));
|
|
318
|
-
// ── Pipeline Internals ────────────────────────────────────────────────
|
|
319
|
-
import { createEvalCommand } from "./commands/eval.js";
|
|
320
|
-
program.addCommand(createEvalCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
321
|
-
import { createCalculateScoresCommand } from "./commands/calculate-scores.js";
|
|
322
|
-
program.addCommand(createCalculateScoresCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
323
|
-
import { createPrCommentCommand } from "./commands/pr-comment.js";
|
|
324
|
-
program.addCommand(createPrCommentCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
325
|
-
import { createGenerateConfigsCommand } from "./commands/generate-configs.js";
|
|
326
|
-
program.addCommand(createGenerateConfigsCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
327
|
-
import { createMeasureRetrievalCommand } from "./commands/measure-retrieval.js";
|
|
328
|
-
program.addCommand(createMeasureRetrievalCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
329
|
-
import { createLookupDocCommand } from "./commands/lookup-doc.js";
|
|
330
|
-
program.addCommand(createLookupDocCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
331
|
-
import { createWebhookServerCommand } from "./commands/webhook-server.js";
|
|
332
|
-
program.addCommand(createWebhookServerCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
333
|
-
// ── Developer Tools ───────────────────────────────────────────────────
|
|
334
|
-
import { createInteractiveCommand } from "./commands/interactive.js";
|
|
335
|
-
program.addCommand(createInteractiveCommand().helpGroup(CommandGroup.DeveloperTools));
|
|
336
|
-
// Shell completion — must be registered last (needs full program tree)
|
|
337
|
-
import { createCompletionCommand } from "./commands/completion.js";
|
|
338
|
-
program.addCommand(createCompletionCommand(program).helpGroup(CommandGroup.DeveloperTools));
|
|
339
|
-
// ---------------------------------------------------------------------------
|
|
340
|
-
// Parse and run — default to showing help when no arguments given
|
|
341
|
-
// ---------------------------------------------------------------------------
|
|
342
|
-
// If no command is specified (just `ailf`), show help.
|
|
343
232
|
// The interactive wizard is still available via `ailf interactive`.
|
|
233
|
+
// ---------------------------------------------------------------------------
|
|
344
234
|
if (process.argv.length <= 2) {
|
|
345
235
|
program.outputHelp();
|
|
346
236
|
}
|
|
@@ -107,7 +107,7 @@ export declare function buildMirrorDocument(task: LiteracyTaskDefinition, opts:
|
|
|
107
107
|
slugToDocId: Map<string, string>;
|
|
108
108
|
}): {
|
|
109
109
|
baseline?: {
|
|
110
|
-
rubric?: "
|
|
110
|
+
rubric?: "abbreviated" | "full" | "none" | undefined;
|
|
111
111
|
enabled?: boolean | undefined;
|
|
112
112
|
} | undefined;
|
|
113
113
|
_id: string;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sanity/ailf",
|
|
3
|
-
"version": "3.8.
|
|
3
|
+
"version": "3.8.1",
|
|
4
4
|
"private": false,
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
@@ -50,6 +50,7 @@
|
|
|
50
50
|
"@anthropic-ai/claude-agent-sdk": "^0.2.105",
|
|
51
51
|
"@types/js-yaml": "^4.0.9",
|
|
52
52
|
"@types/node": "^22.13.1",
|
|
53
|
+
"nock": "^14.0.13",
|
|
53
54
|
"tsx": "^4.19.2",
|
|
54
55
|
"typescript": "^5.7.3",
|
|
55
56
|
"@sanity/ailf-core": "0.1.0",
|
|
@@ -73,9 +74,10 @@
|
|
|
73
74
|
"cli": "tsx src/cli.ts",
|
|
74
75
|
"pipeline": "tsx src/cli.ts pipeline",
|
|
75
76
|
"validate": "tsx src/cli.ts validate config",
|
|
76
|
-
"test": "tsx --test src/__tests__/*.test.ts",
|
|
77
|
+
"test": "tsx --test src/__tests__/*.test.ts src/adapters/**/__tests__/*.adapter.test.ts",
|
|
77
78
|
"test:e2e": "AILF_E2E=1 tsx --test src/__tests__/e2e/*.e2e.test.ts",
|
|
78
|
-
"test:
|
|
79
|
+
"test:e2e:adapters": "AILF_E2E=1 tsx --test src/adapters/**/__tests__/*.adapter.test.ts",
|
|
80
|
+
"test:all": "AILF_E2E=1 tsx --test src/__tests__/*.test.ts src/pipeline/compiler/__tests__/*.test.ts src/__tests__/e2e/*.e2e.test.ts src/adapters/**/__tests__/*.adapter.test.ts",
|
|
79
81
|
"pr-comment": "tsx src/cli.ts pr-comment",
|
|
80
82
|
"coverage-audit": "tsx src/cli.ts report coverage",
|
|
81
83
|
"readiness-report": "tsx src/cli.ts report readiness",
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* agent-harness-handler.test.ts — Tests for agent harness mode compilation.
|
|
3
|
-
*
|
|
4
|
-
* Tests validation, provider assembly, tool permission resolution,
|
|
5
|
-
* assertion mapping, sandbox config, lifecycle extensions, and
|
|
6
|
-
* end-to-end compilation of example tasks.
|
|
7
|
-
*
|
|
8
|
-
* Run: npx tsx --test src/pipeline/compiler/__tests__/agent-harness-handler.test.ts
|
|
9
|
-
*/
|
|
10
|
-
export {};
|