@sanity/ailf 3.7.0 → 3.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/airbyte/ai_literacy_framework.connector.yaml +1 -1
- package/config/thresholds.ts +3 -3
- package/dist/_vendor/ailf-core/examples/index.d.ts +2 -2
- package/dist/_vendor/ailf-core/examples/index.js +2 -2
- package/dist/_vendor/ailf-core/ports/context.d.ts +0 -4
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +38 -12
- package/dist/_vendor/ailf-core/schemas/eval-config.js +102 -22
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +4 -6
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -3
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +2 -2
- package/dist/_vendor/ailf-shared/run-classification.d.ts +2 -2
- package/dist/_vendor/ailf-shared/run-classification.js +1 -1
- package/dist/_vendor/ailf-shared/run-context.d.ts +1 -1
- package/dist/adapters/api-client/build-request.d.ts +0 -2
- package/dist/adapters/api-client/build-request.js +2 -6
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +1 -1
- package/dist/adapters/config-sources/file-config-adapter.d.ts +1 -1
- package/dist/adapters/config-sources/file-config-adapter.js +42 -17
- package/dist/adapters/task-sources/repo-schemas.d.ts +41 -3
- package/dist/adapters/task-sources/repo-schemas.js +127 -0
- package/dist/cli-program.d.ts +39 -0
- package/dist/cli-program.js +137 -0
- package/dist/cli.d.ts +8 -2
- package/dist/cli.js +128 -142
- package/dist/commands/agent-report.js +1 -1
- package/dist/commands/calculate-scores.js +0 -2
- package/dist/commands/check-staleness.js +1 -1
- package/dist/commands/chronic-failures.js +4 -4
- package/dist/commands/coverage-audit.js +6 -7
- package/dist/commands/discovery-report.js +16 -4
- package/dist/commands/eval.d.ts +1 -1
- package/dist/commands/eval.js +1 -1
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +13 -44
- package/dist/commands/fetch-docs.js +0 -2
- package/dist/commands/generate-configs.js +0 -2
- package/dist/commands/grader/index.js +3 -3
- package/dist/commands/init.d.ts +2 -2
- package/dist/commands/init.js +10 -9
- package/dist/commands/interactive.d.ts +1 -1
- package/dist/commands/interactive.js +8 -8
- package/dist/commands/pipeline-action.d.ts +1 -3
- package/dist/commands/pipeline-action.js +174 -140
- package/dist/commands/pr-comment.js +1 -3
- package/dist/commands/publish.d.ts +1 -1
- package/dist/commands/publish.js +2 -4
- package/dist/commands/readiness-report.js +17 -8
- package/dist/commands/remote-pipeline.d.ts +1 -1
- package/dist/commands/remote-pipeline.js +1 -3
- package/dist/commands/run.d.ts +64 -0
- package/dist/commands/{pipeline.js → run.js} +19 -30
- package/dist/commands/shared/help.js +4 -4
- package/dist/commands/shared/options.d.ts +29 -3
- package/dist/commands/shared/options.js +37 -13
- package/dist/commands/validate-tasks.js +1 -1
- package/dist/commands/validate.d.ts +1 -1
- package/dist/commands/validate.js +2 -2
- package/dist/commands/weekly-digest.js +3 -3
- package/dist/config/thresholds.ts +3 -3
- package/dist/orchestration/build-app-context.js +0 -2
- package/dist/orchestration/build-step-sequence.js +1 -11
- package/dist/orchestration/steps/fetch-docs-step.js +1 -1
- package/dist/orchestration/steps/index.d.ts +0 -2
- package/dist/orchestration/steps/index.js +0 -2
- package/dist/orchestration/steps/run-eval-step.js +1 -1
- package/dist/pipeline/cache.d.ts +1 -1
- package/dist/pipeline/map-request-to-config.js +0 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
- package/dist/pipeline/plan.d.ts +2 -4
- package/dist/pipeline/plan.js +4 -32
- package/dist/pipeline/run-context.d.ts +1 -1
- package/dist/pipeline/run-context.js +4 -4
- package/dist/pipeline/validate.d.ts +1 -1
- package/dist/pipeline/validate.js +1 -1
- package/package.json +11 -9
- package/dist/commands/pipeline.d.ts +0 -77
- package/dist/orchestration/steps/discovery-report-step.d.ts +0 -13
- package/dist/orchestration/steps/discovery-report-step.js +0 -62
- package/dist/orchestration/steps/readiness-step.d.ts +0 -13
- package/dist/orchestration/steps/readiness-step.js +0 -98
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +0 -366
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +0 -9
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +0 -145
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +0 -314
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +0 -486
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +0 -425
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +0 -9
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +0 -332
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +0 -12
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +0 -210
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +0 -7
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +0 -404
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +0 -184
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +0 -8
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +0 -301
- package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +0 -9
- package/dist/pipeline/compiler/__tests__/telemetry.test.js +0 -503
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +0 -509
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* FileConfigAdapter — resolves pipeline config from a local config file.
|
|
3
3
|
*
|
|
4
|
-
* Enables `ailf
|
|
4
|
+
* Enables `ailf run --config <path>` to load all pipeline options
|
|
5
5
|
* from a file instead of CLI flags. Supports multiple formats in
|
|
6
6
|
* priority order:
|
|
7
7
|
*
|
|
@@ -47,7 +47,7 @@ export class FileConfigAdapter {
|
|
|
47
47
|
return this.validateAndMap(result.value, ext);
|
|
48
48
|
}
|
|
49
49
|
// YAML / JSON files — load via fs
|
|
50
|
-
const raw = readConfigFile(this.filePath);
|
|
50
|
+
const raw = await readConfigFile(this.filePath);
|
|
51
51
|
return this.validateAndMap(raw, ext);
|
|
52
52
|
}
|
|
53
53
|
/**
|
|
@@ -69,13 +69,12 @@ export class FileConfigAdapter {
|
|
|
69
69
|
// ---------------------------------------------------------------------------
|
|
70
70
|
// Helpers
|
|
71
71
|
// ---------------------------------------------------------------------------
|
|
72
|
-
function readConfigFile(filePath) {
|
|
72
|
+
async function readConfigFile(filePath) {
|
|
73
73
|
const content = readFileSync(filePath, "utf-8");
|
|
74
74
|
const ext = extname(filePath).toLowerCase();
|
|
75
75
|
if (ext === ".yaml" || ext === ".yml") {
|
|
76
|
-
// Dynamic import
|
|
77
|
-
|
|
78
|
-
const { parse } = require("yaml");
|
|
76
|
+
// Dynamic ESM import — only loaded when reading YAML configs.
|
|
77
|
+
const { parse } = await import("yaml");
|
|
79
78
|
return parse(content);
|
|
80
79
|
}
|
|
81
80
|
return JSON.parse(content);
|
|
@@ -91,9 +90,16 @@ function mapEvalConfigToResolvedConfig(config, rootDir) {
|
|
|
91
90
|
}
|
|
92
91
|
// Normalize legacy mode names (e.g., "full" → literacy + variant)
|
|
93
92
|
const normalized = normalizeMode(config.mode ?? "literacy");
|
|
93
|
+
// Output directory (W0077 Phase 6c) — resolve `output.dir` relative to the
|
|
94
|
+
// rootDir (the caller's workspace, set by the FileConfigAdapter caller).
|
|
95
|
+
// When unset, fall back to <rootDir>/.ailf/results/latest/ to mirror the
|
|
96
|
+
// CLI's default. This matches `resolveOutputDir` for the auto-load path.
|
|
97
|
+
const outputDir = config.output?.dir
|
|
98
|
+
? resolve(rootDir, config.output.dir)
|
|
99
|
+
: resolve(rootDir, ".ailf", "results", "latest");
|
|
94
100
|
return {
|
|
95
101
|
rootDir,
|
|
96
|
-
outputDir
|
|
102
|
+
outputDir,
|
|
97
103
|
mode: normalized.mode,
|
|
98
104
|
variant: normalized.variant,
|
|
99
105
|
noAutoScope: config.noAutoScope ?? false,
|
|
@@ -107,21 +113,40 @@ function mapEvalConfigToResolvedConfig(config, rootDir) {
|
|
|
107
113
|
compareEnabled: config.compare ?? false,
|
|
108
114
|
compareThreshold: config.compareThreshold,
|
|
109
115
|
compareBaseline: config.compareBaseline,
|
|
110
|
-
gapAnalysisEnabled: config.gapAnalysis ?? true,
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
116
|
+
gapAnalysisEnabled: config.execution?.gapAnalysis ?? true,
|
|
117
|
+
// W0077 Phase 4 — `publish` is now a policy object. Map the auto value
|
|
118
|
+
// directly to a boolean for the file-config path; the runtime
|
|
119
|
+
// smart-default logic in pipeline-action.ts isn't relevant here because
|
|
120
|
+
// the user has explicitly handed us a config file.
|
|
121
|
+
publishEnabled: config.publish?.auto === "never"
|
|
122
|
+
? false
|
|
123
|
+
: config.publish?.auto !== undefined,
|
|
124
|
+
publishTag: config.publish?.tag,
|
|
115
125
|
noCache: config.noCache ?? false,
|
|
116
126
|
noRemoteCache: config.noRemoteCache ?? false,
|
|
117
|
-
graderReplications: config.graderReplications,
|
|
127
|
+
graderReplications: config.execution?.graderReplications,
|
|
118
128
|
urls: config.urls,
|
|
119
|
-
headers: config.headers,
|
|
120
|
-
allowedOrigins: config.allowedOrigins,
|
|
129
|
+
headers: config.agentic?.headers,
|
|
130
|
+
allowedOrigins: config.agentic?.allowedOrigins,
|
|
121
131
|
searchMode: config.searchMode ?? "open",
|
|
122
|
-
concurrency: config.concurrency,
|
|
132
|
+
concurrency: config.execution?.concurrency,
|
|
123
133
|
remote: false,
|
|
124
|
-
apiUrl: "https://ailf-api.sanity.build",
|
|
134
|
+
apiUrl: config.execution?.apiUrl ?? "https://ailf-api.sanity.build",
|
|
135
|
+
// W0077 Phase 6g — artifact writer settings. `enabled: false` flips
|
|
136
|
+
// `artifactsDisabled` so composition-root selects the NoOp writer.
|
|
137
|
+
artifactsDisabled: config.artifacts?.enabled === false ? true : undefined,
|
|
138
|
+
artifactsDir: config.artifacts?.dir
|
|
139
|
+
? resolve(rootDir, config.artifacts.dir)
|
|
140
|
+
: undefined,
|
|
141
|
+
artifactsExclude: config.artifacts?.exclude,
|
|
142
|
+
// W0077 Phase 6h — task-source selection. Default is content-lake
|
|
143
|
+
// (signaled by `taskSourceType` undefined); when `repo`, the
|
|
144
|
+
// composition-root resolves `repoTasksPath` (defaulting to
|
|
145
|
+
// `<rootDir>/.ailf/tasks/` when unset).
|
|
146
|
+
taskSourceType: config.taskSource?.type === "repo" ? "repo" : undefined,
|
|
147
|
+
repoTasksPath: config.taskSource?.repoTasksPath
|
|
148
|
+
? resolve(rootDir, config.taskSource.repoTasksPath)
|
|
149
|
+
: undefined,
|
|
125
150
|
presets: config.presets,
|
|
126
151
|
};
|
|
127
152
|
}
|
|
@@ -147,8 +147,8 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
147
147
|
baseline: z.ZodOptional<z.ZodObject<{
|
|
148
148
|
enabled: z.ZodOptional<z.ZodBoolean>;
|
|
149
149
|
rubric: z.ZodOptional<z.ZodEnum<{
|
|
150
|
-
full: "full";
|
|
151
150
|
abbreviated: "abbreviated";
|
|
151
|
+
full: "full";
|
|
152
152
|
none: "none";
|
|
153
153
|
}>>;
|
|
154
154
|
}, z.core.$strip>>;
|
|
@@ -773,8 +773,8 @@ export declare const ContentLakeAuthorableTaskSchema: z.ZodObject<{
|
|
|
773
773
|
baseline: z.ZodOptional<z.ZodObject<{
|
|
774
774
|
enabled: z.ZodOptional<z.ZodBoolean>;
|
|
775
775
|
rubric: z.ZodOptional<z.ZodEnum<{
|
|
776
|
-
full: "full";
|
|
777
776
|
abbreviated: "abbreviated";
|
|
777
|
+
full: "full";
|
|
778
778
|
none: "none";
|
|
779
779
|
}>>;
|
|
780
780
|
}, z.core.$strip>>;
|
|
@@ -893,8 +893,8 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
|
|
|
893
893
|
baseline: z.ZodOptional<z.ZodObject<{
|
|
894
894
|
enabled: z.ZodOptional<z.ZodBoolean>;
|
|
895
895
|
rubric: z.ZodOptional<z.ZodEnum<{
|
|
896
|
-
full: "full";
|
|
897
896
|
abbreviated: "abbreviated";
|
|
897
|
+
full: "full";
|
|
898
898
|
none: "none";
|
|
899
899
|
}>>;
|
|
900
900
|
}, z.core.$strip>>;
|
|
@@ -1434,11 +1434,49 @@ export declare const RepoConfigSchema: z.ZodObject<{
|
|
|
1434
1434
|
projectId: z.ZodOptional<z.ZodString>;
|
|
1435
1435
|
dataset: z.ZodOptional<z.ZodString>;
|
|
1436
1436
|
baseUrl: z.ZodOptional<z.ZodString>;
|
|
1437
|
+
studioOrigin: z.ZodOptional<z.ZodString>;
|
|
1437
1438
|
}, z.core.$strip>>;
|
|
1438
1439
|
reportStore: z.ZodOptional<z.ZodObject<{
|
|
1439
1440
|
projectId: z.ZodString;
|
|
1440
1441
|
dataset: z.ZodString;
|
|
1441
1442
|
}, z.core.$strip>>;
|
|
1443
|
+
publish: z.ZodOptional<z.ZodObject<{
|
|
1444
|
+
auto: z.ZodOptional<z.ZodEnum<{
|
|
1445
|
+
never: "never";
|
|
1446
|
+
always: "always";
|
|
1447
|
+
"full-runs": "full-runs";
|
|
1448
|
+
}>>;
|
|
1449
|
+
tag: z.ZodOptional<z.ZodString>;
|
|
1450
|
+
}, z.core.$strip>>;
|
|
1451
|
+
execution: z.ZodOptional<z.ZodObject<{
|
|
1452
|
+
concurrency: z.ZodOptional<z.ZodNumber>;
|
|
1453
|
+
graderReplications: z.ZodOptional<z.ZodNumber>;
|
|
1454
|
+
gapAnalysis: z.ZodOptional<z.ZodBoolean>;
|
|
1455
|
+
apiUrl: z.ZodOptional<z.ZodString>;
|
|
1456
|
+
}, z.core.$strip>>;
|
|
1457
|
+
output: z.ZodOptional<z.ZodObject<{
|
|
1458
|
+
dir: z.ZodOptional<z.ZodString>;
|
|
1459
|
+
}, z.core.$strip>>;
|
|
1460
|
+
owner: z.ZodOptional<z.ZodObject<{
|
|
1461
|
+
team: z.ZodOptional<z.ZodString>;
|
|
1462
|
+
individual: z.ZodOptional<z.ZodString>;
|
|
1463
|
+
}, z.core.$strip>>;
|
|
1464
|
+
agentic: z.ZodOptional<z.ZodObject<{
|
|
1465
|
+
headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
1466
|
+
allowedOrigins: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
1467
|
+
}, z.core.$strip>>;
|
|
1468
|
+
artifacts: z.ZodOptional<z.ZodObject<{
|
|
1469
|
+
enabled: z.ZodOptional<z.ZodBoolean>;
|
|
1470
|
+
dir: z.ZodOptional<z.ZodString>;
|
|
1471
|
+
exclude: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
1472
|
+
}, z.core.$strip>>;
|
|
1473
|
+
taskSource: z.ZodOptional<z.ZodObject<{
|
|
1474
|
+
type: z.ZodOptional<z.ZodEnum<{
|
|
1475
|
+
"content-lake": "content-lake";
|
|
1476
|
+
repo: "repo";
|
|
1477
|
+
}>>;
|
|
1478
|
+
repoTasksPath: z.ZodOptional<z.ZodString>;
|
|
1479
|
+
}, z.core.$strip>>;
|
|
1442
1480
|
triggers: z.ZodOptional<z.ZodObject<{
|
|
1443
1481
|
pr: z.ZodOptional<z.ZodObject<{
|
|
1444
1482
|
mode: z.ZodDefault<z.ZodEnum<{
|
|
@@ -427,12 +427,17 @@ const ScheduleTriggerSchema = TriggerConfigSchema.extend({
|
|
|
427
427
|
/**
|
|
428
428
|
* Documentation source configuration.
|
|
429
429
|
* Defines which Sanity project holds the documentation being evaluated.
|
|
430
|
+
*
|
|
431
|
+
* `studioOrigin` (W0077 Phase 6d) replaces the retired
|
|
432
|
+
* `--sanity-studio-origin` CLI flag on `ailf run`. The `SANITY_STUDIO_ORIGIN`
|
|
433
|
+
* env var still wins over this value at resolution time.
|
|
430
434
|
*/
|
|
431
435
|
const SourceConfigSchema = z
|
|
432
436
|
.object({
|
|
433
437
|
projectId: z.string().min(1).optional(),
|
|
434
438
|
dataset: z.string().min(1).optional(),
|
|
435
439
|
baseUrl: z.string().url().optional(),
|
|
440
|
+
studioOrigin: z.string().url().optional(),
|
|
436
441
|
})
|
|
437
442
|
.optional();
|
|
438
443
|
/**
|
|
@@ -445,6 +450,121 @@ const ReportStoreConfigSchema = z
|
|
|
445
450
|
dataset: z.string().min(1),
|
|
446
451
|
})
|
|
447
452
|
.optional();
|
|
453
|
+
/**
|
|
454
|
+
* Publish policy. Controls when `ailf run` writes a report to the Content
|
|
455
|
+
* Lake without an explicit `--publish` / `--no-publish` flag.
|
|
456
|
+
*
|
|
457
|
+
* - `auto: "always"` — publish any run with a configured report store
|
|
458
|
+
* - `auto: "full-runs"` — publish non-debug runs (default)
|
|
459
|
+
* - `auto: "never"` — never auto-publish; users must pass --publish
|
|
460
|
+
*
|
|
461
|
+
* `tag` is a default value for `--publish-tag` when not passed at the CLI.
|
|
462
|
+
*
|
|
463
|
+
* @see docs/design-docs/pipeline-command-surface.md §5.3
|
|
464
|
+
*/
|
|
465
|
+
const PublishConfigSchema = z
|
|
466
|
+
.object({
|
|
467
|
+
auto: z.enum(["always", "full-runs", "never"]).optional(),
|
|
468
|
+
tag: z.string().optional(),
|
|
469
|
+
})
|
|
470
|
+
.optional();
|
|
471
|
+
/**
|
|
472
|
+
* Execution-tier configuration. Per-environment values that the four
|
|
473
|
+
* retired CLI flags used to set: concurrency, grader replications, gap
|
|
474
|
+
* analysis toggle, and the AILF API URL.
|
|
475
|
+
*
|
|
476
|
+
* @see docs/design-docs/pipeline-command-surface.md §5.7 (W0077 Phase 6b)
|
|
477
|
+
*/
|
|
478
|
+
const ExecutionConfigSchema = z
|
|
479
|
+
.object({
|
|
480
|
+
concurrency: z.number().int().positive().optional(),
|
|
481
|
+
graderReplications: z.number().int().positive().optional(),
|
|
482
|
+
gapAnalysis: z.boolean().optional(),
|
|
483
|
+
apiUrl: z.string().url().optional(),
|
|
484
|
+
})
|
|
485
|
+
.optional();
|
|
486
|
+
/**
|
|
487
|
+
* Task-source configuration (W0077 Phase 6h). Replaces the retired
|
|
488
|
+
* `--task-source` and `--repo-tasks-path` CLI flags on `ailf run`.
|
|
489
|
+
*
|
|
490
|
+
* - `type` — `content-lake` (default) or `repo`. When `repo`,
|
|
491
|
+
* tasks load from `repoTasksPath` (or
|
|
492
|
+
* `<cwd>/.ailf/tasks/` if unset).
|
|
493
|
+
* - `repoTasksPath` — optional explicit path. Resolved relative to the
|
|
494
|
+
* caller's cwd. Required to exist on disk.
|
|
495
|
+
*
|
|
496
|
+
* No env-var fallback today; cascade is config-file → built-in default.
|
|
497
|
+
*/
|
|
498
|
+
const TaskSourceConfigSchema = z
|
|
499
|
+
.object({
|
|
500
|
+
type: z.enum(["content-lake", "repo"]).optional(),
|
|
501
|
+
repoTasksPath: z.string().min(1).optional(),
|
|
502
|
+
})
|
|
503
|
+
.optional();
|
|
504
|
+
/**
|
|
505
|
+
* Artifact-writer configuration (W0077 Phase 6g). Replaces the retired
|
|
506
|
+
* `--no-artifacts`, `--artifacts-dir`, and `--artifacts-exclude` CLI flags
|
|
507
|
+
* on `ailf run`. The `AILF_ARTIFACTS_DIR` env var still wins over
|
|
508
|
+
* `artifacts.dir` at resolution time. `artifacts.enabled` defaults to
|
|
509
|
+
* `true` (writers attached); set `false` to disable all writers (mirrors
|
|
510
|
+
* the legacy `--no-artifacts` semantics).
|
|
511
|
+
*
|
|
512
|
+
* Other commands (`ailf runs export`, etc.) keep their `--artifacts-dir`
|
|
513
|
+
* flag — that's a "read from this directory" override, distinct from the
|
|
514
|
+
* pipeline's write-side `artifacts.dir`.
|
|
515
|
+
*/
|
|
516
|
+
const ArtifactsConfigSchema = z
|
|
517
|
+
.object({
|
|
518
|
+
enabled: z.boolean().optional(),
|
|
519
|
+
dir: z.string().min(1).optional(),
|
|
520
|
+
exclude: z.array(z.string().min(1)).optional(),
|
|
521
|
+
})
|
|
522
|
+
.optional();
|
|
523
|
+
/**
|
|
524
|
+
* Owner attribution (W0077 Phase 6f). Replaces the retired `--owner-team`
|
|
525
|
+
* and `--owner-individual` CLI flags. Both feed the D0037 caller envelope
|
|
526
|
+
* that surfaces in remote-mode runs. Env vars `AILF_OWNER_TEAM` and
|
|
527
|
+
* `AILF_OWNER_INDIVIDUAL` still win over these values at resolution time.
|
|
528
|
+
*/
|
|
529
|
+
const OwnerConfigSchema = z
|
|
530
|
+
.object({
|
|
531
|
+
team: z.string().min(1).optional(),
|
|
532
|
+
individual: z.string().min(1).optional(),
|
|
533
|
+
})
|
|
534
|
+
.optional();
|
|
535
|
+
/**
|
|
536
|
+
* Agentic-mode configuration (W0077 Phase 6f). Replaces the retired
|
|
537
|
+
* `--header` and `--allowed-origin` CLI flags. `headers` is a key/value
|
|
538
|
+
* object (mirrors `DOC_HEADERS` env-var JSON shape); `allowedOrigins` is a
|
|
539
|
+
* list of origin globs. The `DOC_HEADERS` and `DOC_ALLOWED_ORIGIN(S)` env
|
|
540
|
+
* vars still apply downstream as additive merges.
|
|
541
|
+
*/
|
|
542
|
+
const AgenticConfigSchema = z
|
|
543
|
+
.object({
|
|
544
|
+
headers: z.record(z.string(), z.string()).optional(),
|
|
545
|
+
allowedOrigins: z.array(z.string().min(1)).optional(),
|
|
546
|
+
})
|
|
547
|
+
.optional();
|
|
548
|
+
/**
|
|
549
|
+
* Output-directory configuration. Replaces the retired `--output-dir`
|
|
550
|
+
* CLI flag on `ailf run`. Resolution order:
|
|
551
|
+
*
|
|
552
|
+
* .ailf/config.yaml `output.dir` > built-in default
|
|
553
|
+
*
|
|
554
|
+
* Path is resolved relative to the caller's cwd. The built-in default is
|
|
555
|
+
* `<cwd>/.ailf/results/latest/` (see resolve-output-dir.ts). Other commands
|
|
556
|
+
* (`ailf publish`, `ailf pr-comment`, etc.) keep their `--output-dir`
|
|
557
|
+
* flag — that's a "read from this directory" override, distinct from the
|
|
558
|
+
* pipeline's write-side `output.dir`.
|
|
559
|
+
*
|
|
560
|
+
* @see docs/design-docs/pipeline-command-surface.md §5.7 (W0077 Phase 6c)
|
|
561
|
+
* @see docs/design-docs/output-dir-routing.md
|
|
562
|
+
*/
|
|
563
|
+
const OutputConfigSchema = z
|
|
564
|
+
.object({
|
|
565
|
+
dir: z.string().min(1).optional(),
|
|
566
|
+
})
|
|
567
|
+
.optional();
|
|
448
568
|
/**
|
|
449
569
|
* Zod schema for .ailf/config.yaml — controls documentation source,
|
|
450
570
|
* report destination, and trigger behavior for evaluations from an
|
|
@@ -453,6 +573,13 @@ const ReportStoreConfigSchema = z
|
|
|
453
573
|
export const RepoConfigSchema = z.object({
|
|
454
574
|
source: SourceConfigSchema,
|
|
455
575
|
reportStore: ReportStoreConfigSchema,
|
|
576
|
+
publish: PublishConfigSchema,
|
|
577
|
+
execution: ExecutionConfigSchema,
|
|
578
|
+
output: OutputConfigSchema,
|
|
579
|
+
owner: OwnerConfigSchema,
|
|
580
|
+
agentic: AgenticConfigSchema,
|
|
581
|
+
artifacts: ArtifactsConfigSchema,
|
|
582
|
+
taskSource: TaskSourceConfigSchema,
|
|
456
583
|
triggers: z
|
|
457
584
|
.object({
|
|
458
585
|
pr: TriggerConfigSchema.optional(),
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* cli-program.ts — pure factory for the AILF Commander program.
|
|
3
|
+
*
|
|
4
|
+
* Splits the program construction out of cli.ts so the CLI is testable
|
|
5
|
+
* in-process. cli.ts owns bootstrap side effects (dotenv loading,
|
|
6
|
+
* retired-flag/env/cmd checks, AILF_LOG_LEVEL pre-scan, parseAsync); this
|
|
7
|
+
* module owns command wiring.
|
|
8
|
+
*
|
|
9
|
+
* The W0078 M4 black-box harness imports `buildCliProgram()` directly so
|
|
10
|
+
* tests can construct a fresh program, attach `exitOverride()`, capture
|
|
11
|
+
* stdout/stderr, and parse a synthetic argv — all without spawning a
|
|
12
|
+
* subprocess.
|
|
13
|
+
*
|
|
14
|
+
* @see packages/eval/src/__tests__/cli-harness/run-cli.ts
|
|
15
|
+
*/
|
|
16
|
+
import { Command } from "commander";
|
|
17
|
+
/**
|
|
18
|
+
* Options for `buildCliProgram`.
|
|
19
|
+
*/
|
|
20
|
+
export interface BuildCliProgramOptions {
|
|
21
|
+
/**
|
|
22
|
+
* Path to the eval package root (the directory containing package.json).
|
|
23
|
+
* Used to resolve the version string and as the root passed to the
|
|
24
|
+
* `--explain` handler.
|
|
25
|
+
*/
|
|
26
|
+
evalRoot: string;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Construct the Commander program with every subcommand registered.
|
|
30
|
+
*
|
|
31
|
+
* Pure factory — no I/O beyond reading package.json for the version, no
|
|
32
|
+
* `process.exit()`, no `process.argv` access. Tests can call this and
|
|
33
|
+
* attach `program.exitOverride()` before parsing to capture exit codes
|
|
34
|
+
* instead of terminating the process.
|
|
35
|
+
*
|
|
36
|
+
* Registration order determines group display order in `--help`. Commands
|
|
37
|
+
* within a group appear in the order they're added.
|
|
38
|
+
*/
|
|
39
|
+
export declare function buildCliProgram(opts: BuildCliProgramOptions): Command;
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* cli-program.ts — pure factory for the AILF Commander program.
|
|
3
|
+
*
|
|
4
|
+
* Splits the program construction out of cli.ts so the CLI is testable
|
|
5
|
+
* in-process. cli.ts owns bootstrap side effects (dotenv loading,
|
|
6
|
+
* retired-flag/env/cmd checks, AILF_LOG_LEVEL pre-scan, parseAsync); this
|
|
7
|
+
* module owns command wiring.
|
|
8
|
+
*
|
|
9
|
+
* The W0078 M4 black-box harness imports `buildCliProgram()` directly so
|
|
10
|
+
* tests can construct a fresh program, attach `exitOverride()`, capture
|
|
11
|
+
* stdout/stderr, and parse a synthetic argv — all without spawning a
|
|
12
|
+
* subprocess.
|
|
13
|
+
*
|
|
14
|
+
* @see packages/eval/src/__tests__/cli-harness/run-cli.ts
|
|
15
|
+
*/
|
|
16
|
+
import { Command } from "commander";
|
|
17
|
+
import { readFileSync } from "node:fs";
|
|
18
|
+
import { resolve } from "node:path";
|
|
19
|
+
import { CommandGroup, configureProgram } from "./commands/shared/help.js";
|
|
20
|
+
import { createAgentReportCommand } from "./commands/agent-report.js";
|
|
21
|
+
import { createBaselineCommand } from "./commands/baseline.js";
|
|
22
|
+
import { createCacheCommand } from "./commands/cache.js";
|
|
23
|
+
import { createCalculateScoresCommand } from "./commands/calculate-scores.js";
|
|
24
|
+
import { createCheckStalenessCommand } from "./commands/check-staleness.js";
|
|
25
|
+
import { createChronicFailuresCommand } from "./commands/chronic-failures.js";
|
|
26
|
+
import { createCompareCommand } from "./commands/compare.js";
|
|
27
|
+
import { createCompletionCommand } from "./commands/completion.js";
|
|
28
|
+
import { createCoverageAuditCommand } from "./commands/coverage-audit.js";
|
|
29
|
+
import { createDiscoveryReportCommand } from "./commands/discovery-report.js";
|
|
30
|
+
import { createEvalCommand } from "./commands/eval.js";
|
|
31
|
+
import { createFetchDocsCommand } from "./commands/fetch-docs.js";
|
|
32
|
+
import { createGenerateConfigsCommand } from "./commands/generate-configs.js";
|
|
33
|
+
import { createGraderCommand } from "./commands/grader/index.js";
|
|
34
|
+
import { createInitCommand } from "./commands/init.js";
|
|
35
|
+
import { createInteractiveCommand } from "./commands/interactive.js";
|
|
36
|
+
import { createLookupDocCommand } from "./commands/lookup-doc.js";
|
|
37
|
+
import { createMeasureRetrievalCommand } from "./commands/measure-retrieval.js";
|
|
38
|
+
import { createPrCommentCommand } from "./commands/pr-comment.js";
|
|
39
|
+
import { createPublishCommand } from "./commands/publish.js";
|
|
40
|
+
import { createReadinessReportCommand } from "./commands/readiness-report.js";
|
|
41
|
+
import { createRunCommand } from "./commands/run.js";
|
|
42
|
+
import { createRunsCommand } from "./commands/runs.js";
|
|
43
|
+
import { createValidateConfigCommand } from "./commands/validate.js";
|
|
44
|
+
import { createValidateTasksCommand } from "./commands/validate-tasks.js";
|
|
45
|
+
import { createWebhookServerCommand } from "./commands/webhook-server.js";
|
|
46
|
+
import { createWeeklyDigestCommand } from "./commands/weekly-digest.js";
|
|
47
|
+
/**
|
|
48
|
+
* Construct the Commander program with every subcommand registered.
|
|
49
|
+
*
|
|
50
|
+
* Pure factory — no I/O beyond reading package.json for the version, no
|
|
51
|
+
* `process.exit()`, no `process.argv` access. Tests can call this and
|
|
52
|
+
* attach `program.exitOverride()` before parsing to capture exit codes
|
|
53
|
+
* instead of terminating the process.
|
|
54
|
+
*
|
|
55
|
+
* Registration order determines group display order in `--help`. Commands
|
|
56
|
+
* within a group appear in the order they're added.
|
|
57
|
+
*/
|
|
58
|
+
export function buildCliProgram(opts) {
|
|
59
|
+
const { evalRoot } = opts;
|
|
60
|
+
const pkgPath = resolve(evalRoot, "package.json");
|
|
61
|
+
const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
|
|
62
|
+
const program = new Command()
|
|
63
|
+
.name("ailf")
|
|
64
|
+
.description("AI Literacy Framework — evaluate how well docs enable AI coding tools\n\nMeasure whether AI coding agents can find the right documentation\nand produce correct implementations of your product features.")
|
|
65
|
+
.version(pkg.version)
|
|
66
|
+
.option("-v, --verbose", "Increase log output")
|
|
67
|
+
.option("-q, --quiet", "Suppress non-error output")
|
|
68
|
+
.option("--dotenv <path>", "Override default .env file path")
|
|
69
|
+
.option("--explain", "Show execution plan without running")
|
|
70
|
+
.option("--format <fmt>", "Output format for --explain (console, json)", "console")
|
|
71
|
+
.option("-y, --yes", "With --explain: show plan then prompt to confirm execution");
|
|
72
|
+
configureProgram(program);
|
|
73
|
+
// Global --explain hook — intercepts any command before execution
|
|
74
|
+
program.hook("preAction", async (thisCommand, actionCommand) => {
|
|
75
|
+
const globalOpts = thisCommand.opts();
|
|
76
|
+
if (!globalOpts.explain)
|
|
77
|
+
return;
|
|
78
|
+
const { handleExplain } = await import("./commands/explain-handler.js");
|
|
79
|
+
try {
|
|
80
|
+
await handleExplain(actionCommand, globalOpts.yes ?? false, evalRoot);
|
|
81
|
+
process.exit(0);
|
|
82
|
+
}
|
|
83
|
+
catch (err) {
|
|
84
|
+
// Sentinel from --yes confirmation: user wants to proceed
|
|
85
|
+
if (err !== null &&
|
|
86
|
+
typeof err === "object" &&
|
|
87
|
+
"__proceedArgv" in err) {
|
|
88
|
+
const filteredArgv = err.__proceedArgv;
|
|
89
|
+
console.log("\n ▸ Proceeding with execution…\n");
|
|
90
|
+
await program.parseAsync(filteredArgv);
|
|
91
|
+
return;
|
|
92
|
+
}
|
|
93
|
+
throw err;
|
|
94
|
+
}
|
|
95
|
+
});
|
|
96
|
+
// ── Core Workflow ──────────────────────────────────────────────────────
|
|
97
|
+
program.addCommand(createRunCommand().helpGroup(CommandGroup.CoreWorkflow));
|
|
98
|
+
program.addCommand(createCompareCommand().helpGroup(CommandGroup.CoreWorkflow));
|
|
99
|
+
program.addCommand(createBaselineCommand().helpGroup(CommandGroup.CoreWorkflow));
|
|
100
|
+
program.addCommand(createPublishCommand().helpGroup(CommandGroup.CoreWorkflow));
|
|
101
|
+
program.addCommand(createRunsCommand().helpGroup(CommandGroup.CoreWorkflow));
|
|
102
|
+
// ── Analysis & Reports ────────────────────────────────────────────────
|
|
103
|
+
const reportCommand = new Command("report")
|
|
104
|
+
.description("Generate analysis and reporting outputs from evaluation runs")
|
|
105
|
+
.addCommand(createReadinessReportCommand())
|
|
106
|
+
.addCommand(createChronicFailuresCommand())
|
|
107
|
+
.addCommand(createCoverageAuditCommand())
|
|
108
|
+
.addCommand(createDiscoveryReportCommand())
|
|
109
|
+
.addCommand(createAgentReportCommand())
|
|
110
|
+
.addCommand(createWeeklyDigestCommand())
|
|
111
|
+
.addCommand(createCheckStalenessCommand());
|
|
112
|
+
program.addCommand(reportCommand.helpGroup(CommandGroup.AnalysisReports));
|
|
113
|
+
// ── Grader Reliability ────────────────────────────────────────────────
|
|
114
|
+
program.addCommand(createGraderCommand().helpGroup(CommandGroup.GraderReliability));
|
|
115
|
+
// ── Setup & Configuration ─────────────────────────────────────────────
|
|
116
|
+
program.addCommand(createInitCommand().helpGroup(CommandGroup.SetupConfig));
|
|
117
|
+
const validateCommand = new Command("validate")
|
|
118
|
+
.description("Validate AILF configuration and task files")
|
|
119
|
+
.addCommand(createValidateConfigCommand())
|
|
120
|
+
.addCommand(createValidateTasksCommand());
|
|
121
|
+
program.addCommand(validateCommand.helpGroup(CommandGroup.SetupConfig));
|
|
122
|
+
program.addCommand(createFetchDocsCommand().helpGroup(CommandGroup.SetupConfig));
|
|
123
|
+
program.addCommand(createCacheCommand().helpGroup(CommandGroup.SetupConfig));
|
|
124
|
+
// ── Pipeline Internals ────────────────────────────────────────────────
|
|
125
|
+
program.addCommand(createEvalCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
126
|
+
program.addCommand(createCalculateScoresCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
127
|
+
program.addCommand(createPrCommentCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
128
|
+
program.addCommand(createGenerateConfigsCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
129
|
+
program.addCommand(createMeasureRetrievalCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
130
|
+
program.addCommand(createLookupDocCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
131
|
+
program.addCommand(createWebhookServerCommand().helpGroup(CommandGroup.PipelineInternals));
|
|
132
|
+
// ── Developer Tools ───────────────────────────────────────────────────
|
|
133
|
+
program.addCommand(createInteractiveCommand().helpGroup(CommandGroup.DeveloperTools));
|
|
134
|
+
// Shell completion — must be registered last (needs full program tree)
|
|
135
|
+
program.addCommand(createCompletionCommand(program).helpGroup(CommandGroup.DeveloperTools));
|
|
136
|
+
return program;
|
|
137
|
+
}
|
package/dist/cli.d.ts
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
* appends Quick Start examples.
|
|
12
12
|
*
|
|
13
13
|
* Usage:
|
|
14
|
-
* ailf
|
|
14
|
+
* ailf run [flags] # full evaluation run
|
|
15
15
|
* ailf compare [flags] # compare evaluation runs
|
|
16
16
|
* ailf baseline <cmd> [flags] # baseline management
|
|
17
17
|
* ailf validate [flags] # config validation
|
|
@@ -24,6 +24,12 @@
|
|
|
24
24
|
* --dotenv <path> # override default .env path
|
|
25
25
|
*
|
|
26
26
|
* Dev mode (without building):
|
|
27
|
-
* tsx src/cli.ts
|
|
27
|
+
* tsx src/cli.ts run --debug
|
|
28
|
+
*
|
|
29
|
+
* Module split: this file owns *bootstrap side effects* (dotenv,
|
|
30
|
+
* retired-flag/env/cmd checks, AILF_LOG_LEVEL pre-scan, parseAsync).
|
|
31
|
+
* The Commander wiring lives in ./cli-program.ts so the W0078 M4 black-box
|
|
32
|
+
* harness can build the program in-process without firing those side
|
|
33
|
+
* effects.
|
|
28
34
|
*/
|
|
29
35
|
export {};
|