@sanity/ailf 3.7.0 → 3.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/airbyte/ai_literacy_framework.connector.yaml +1 -1
- package/config/thresholds.ts +3 -3
- package/dist/_vendor/ailf-core/examples/index.d.ts +2 -2
- package/dist/_vendor/ailf-core/examples/index.js +2 -2
- package/dist/_vendor/ailf-core/ports/context.d.ts +0 -4
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +38 -12
- package/dist/_vendor/ailf-core/schemas/eval-config.js +102 -22
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +4 -6
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -3
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +2 -2
- package/dist/_vendor/ailf-shared/run-classification.d.ts +2 -2
- package/dist/_vendor/ailf-shared/run-classification.js +1 -1
- package/dist/_vendor/ailf-shared/run-context.d.ts +1 -1
- package/dist/adapters/api-client/build-request.d.ts +0 -2
- package/dist/adapters/api-client/build-request.js +2 -6
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +1 -1
- package/dist/adapters/config-sources/file-config-adapter.d.ts +1 -1
- package/dist/adapters/config-sources/file-config-adapter.js +42 -17
- package/dist/adapters/task-sources/repo-schemas.d.ts +41 -3
- package/dist/adapters/task-sources/repo-schemas.js +127 -0
- package/dist/cli-program.d.ts +39 -0
- package/dist/cli-program.js +137 -0
- package/dist/cli.d.ts +8 -2
- package/dist/cli.js +128 -142
- package/dist/commands/agent-report.js +1 -1
- package/dist/commands/calculate-scores.js +0 -2
- package/dist/commands/check-staleness.js +1 -1
- package/dist/commands/chronic-failures.js +4 -4
- package/dist/commands/coverage-audit.js +6 -7
- package/dist/commands/discovery-report.js +16 -4
- package/dist/commands/eval.d.ts +1 -1
- package/dist/commands/eval.js +1 -1
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +13 -44
- package/dist/commands/fetch-docs.js +0 -2
- package/dist/commands/generate-configs.js +0 -2
- package/dist/commands/grader/index.js +3 -3
- package/dist/commands/init.d.ts +2 -2
- package/dist/commands/init.js +10 -9
- package/dist/commands/interactive.d.ts +1 -1
- package/dist/commands/interactive.js +8 -8
- package/dist/commands/pipeline-action.d.ts +1 -3
- package/dist/commands/pipeline-action.js +174 -140
- package/dist/commands/pr-comment.js +1 -3
- package/dist/commands/publish.d.ts +1 -1
- package/dist/commands/publish.js +2 -4
- package/dist/commands/readiness-report.js +17 -8
- package/dist/commands/remote-pipeline.d.ts +1 -1
- package/dist/commands/remote-pipeline.js +1 -3
- package/dist/commands/run.d.ts +64 -0
- package/dist/commands/{pipeline.js → run.js} +19 -30
- package/dist/commands/shared/help.js +4 -4
- package/dist/commands/shared/options.d.ts +29 -3
- package/dist/commands/shared/options.js +37 -13
- package/dist/commands/validate-tasks.js +1 -1
- package/dist/commands/validate.d.ts +1 -1
- package/dist/commands/validate.js +2 -2
- package/dist/commands/weekly-digest.js +3 -3
- package/dist/config/thresholds.ts +3 -3
- package/dist/orchestration/build-app-context.js +0 -2
- package/dist/orchestration/build-step-sequence.js +1 -11
- package/dist/orchestration/steps/fetch-docs-step.js +1 -1
- package/dist/orchestration/steps/index.d.ts +0 -2
- package/dist/orchestration/steps/index.js +0 -2
- package/dist/orchestration/steps/run-eval-step.js +1 -1
- package/dist/pipeline/cache.d.ts +1 -1
- package/dist/pipeline/map-request-to-config.js +0 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
- package/dist/pipeline/plan.d.ts +2 -4
- package/dist/pipeline/plan.js +4 -32
- package/dist/pipeline/run-context.d.ts +1 -1
- package/dist/pipeline/run-context.js +4 -4
- package/dist/pipeline/validate.d.ts +1 -1
- package/dist/pipeline/validate.js +1 -1
- package/package.json +11 -9
- package/dist/commands/pipeline.d.ts +0 -77
- package/dist/orchestration/steps/discovery-report-step.d.ts +0 -13
- package/dist/orchestration/steps/discovery-report-step.js +0 -62
- package/dist/orchestration/steps/readiness-step.d.ts +0 -13
- package/dist/orchestration/steps/readiness-step.js +0 -98
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +0 -366
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +0 -9
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +0 -145
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +0 -314
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +0 -486
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +0 -425
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +0 -9
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +0 -332
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +0 -12
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +0 -210
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +0 -7
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +0 -404
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +0 -184
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +0 -8
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +0 -301
- package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +0 -9
- package/dist/pipeline/compiler/__tests__/telemetry.test.js +0 -503
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +0 -509
package/dist/pipeline/plan.d.ts
CHANGED
|
@@ -61,7 +61,7 @@ export interface DebugPlan {
|
|
|
61
61
|
export interface ExecutionPlan {
|
|
62
62
|
/** Cache prediction per step */
|
|
63
63
|
cacheStatus: Record<string, "hit" | "miss" | "skipped" | "unknown">;
|
|
64
|
-
/** Command name (e.g., "
|
|
64
|
+
/** Command name (e.g., "run", "compare", "validate") */
|
|
65
65
|
command: string;
|
|
66
66
|
/** Comparison plan (when --compare is set) */
|
|
67
67
|
comparison?: ComparisonPlan;
|
|
@@ -134,7 +134,6 @@ export interface PlanOptions {
|
|
|
134
134
|
compareThreshold?: number;
|
|
135
135
|
concurrency?: number;
|
|
136
136
|
debug?: DebugOptions;
|
|
137
|
-
discoveryReportEnabled: boolean;
|
|
138
137
|
dryRun: boolean;
|
|
139
138
|
gapAnalysisEnabled: boolean;
|
|
140
139
|
graderReplications?: number;
|
|
@@ -143,7 +142,6 @@ export interface PlanOptions {
|
|
|
143
142
|
variant?: string;
|
|
144
143
|
noCache: boolean;
|
|
145
144
|
publishEnabled: boolean;
|
|
146
|
-
readinessEnabled: boolean;
|
|
147
145
|
repoTasksPath?: string;
|
|
148
146
|
skipEval: boolean;
|
|
149
147
|
skipFetch: boolean;
|
|
@@ -152,7 +150,7 @@ export interface PlanOptions {
|
|
|
152
150
|
taskOption?: string;
|
|
153
151
|
}
|
|
154
152
|
/**
|
|
155
|
-
* Build a complete execution plan for the `
|
|
153
|
+
* Build a complete execution plan for the `run` command.
|
|
156
154
|
*
|
|
157
155
|
* This is a read-only operation — it computes the plan by calling existing
|
|
158
156
|
* pure functions (task expansion, model loading, cache lookup, pricing)
|
package/dist/pipeline/plan.js
CHANGED
|
@@ -99,7 +99,7 @@ const AVG_TOKENS = {
|
|
|
99
99
|
// Cache prediction
|
|
100
100
|
// ---------------------------------------------------------------------------
|
|
101
101
|
/**
|
|
102
|
-
* Build a complete execution plan for the `
|
|
102
|
+
* Build a complete execution plan for the `run` command.
|
|
103
103
|
*
|
|
104
104
|
* This is a read-only operation — it computes the plan by calling existing
|
|
105
105
|
* pure functions (task expansion, model loading, cache lookup, pricing)
|
|
@@ -322,13 +322,11 @@ export async function buildPipelinePlan(opts, rootDir) {
|
|
|
322
322
|
// 7. Build step plan
|
|
323
323
|
const steps = buildStepPlan({
|
|
324
324
|
compareEnabled: opts.compareEnabled,
|
|
325
|
-
discoveryReportEnabled: opts.discoveryReportEnabled,
|
|
326
325
|
dryRun: opts.dryRun,
|
|
327
326
|
gapAnalysisEnabled: opts.gapAnalysisEnabled,
|
|
328
327
|
graderReplications: opts.graderReplications,
|
|
329
328
|
noCache: opts.noCache,
|
|
330
329
|
publishEnabled: opts.publishEnabled,
|
|
331
|
-
readinessEnabled: opts.readinessEnabled,
|
|
332
330
|
skipEval: opts.skipEval,
|
|
333
331
|
skipFetch: opts.skipFetch,
|
|
334
332
|
}, cachePrediction);
|
|
@@ -338,14 +336,12 @@ export async function buildPipelinePlan(opts, rootDir) {
|
|
|
338
336
|
const filesRead = collectFilesRead(rootDir, opts.mode);
|
|
339
337
|
const filesCreated = collectFilesCreated({
|
|
340
338
|
compareEnabled: opts.compareEnabled,
|
|
341
|
-
discoveryReportEnabled: opts.discoveryReportEnabled,
|
|
342
339
|
gapAnalysisEnabled: opts.gapAnalysisEnabled,
|
|
343
340
|
publishEnabled: opts.publishEnabled,
|
|
344
|
-
readinessEnabled: opts.readinessEnabled,
|
|
345
341
|
});
|
|
346
342
|
return {
|
|
347
343
|
cacheStatus: cachePrediction.predictions,
|
|
348
|
-
command: "
|
|
344
|
+
command: "run",
|
|
349
345
|
comparison,
|
|
350
346
|
costEstimate,
|
|
351
347
|
debug: debugPlan,
|
|
@@ -486,7 +482,7 @@ function buildStepPlan(opts, cachePrediction) {
|
|
|
486
482
|
estimatedSavedMs: fetchStatus === "hit" ? cachePrediction.estimatedSavedMs : undefined,
|
|
487
483
|
name: "Fetch documentation",
|
|
488
484
|
reason: fetchStatus === "skipped"
|
|
489
|
-
? "--
|
|
485
|
+
? "--no-fetch: reuse cached contexts"
|
|
490
486
|
: fetchStatus === "hit"
|
|
491
487
|
? "CACHED (inputs unchanged)"
|
|
492
488
|
: "Fetch from Sanity Content Lake",
|
|
@@ -508,7 +504,7 @@ function buildStepPlan(opts, cachePrediction) {
|
|
|
508
504
|
cacheStatus: evalStatus,
|
|
509
505
|
name: "Run evaluation",
|
|
510
506
|
reason: opts.skipEval
|
|
511
|
-
? "--
|
|
507
|
+
? "--no-eval: reuse existing results"
|
|
512
508
|
: evalStatus === "hit"
|
|
513
509
|
? "CACHED (inputs unchanged)"
|
|
514
510
|
: "Execute Promptfoo evaluation against all models",
|
|
@@ -567,24 +563,6 @@ function buildStepPlan(opts, cachePrediction) {
|
|
|
567
563
|
willRun: true,
|
|
568
564
|
});
|
|
569
565
|
}
|
|
570
|
-
// Step 6: Readiness report (optional)
|
|
571
|
-
if (opts.readinessEnabled) {
|
|
572
|
-
steps.push({
|
|
573
|
-
cacheStatus: "miss",
|
|
574
|
-
name: "Readiness report",
|
|
575
|
-
reason: "Generate launch readiness checklist",
|
|
576
|
-
willRun: true,
|
|
577
|
-
});
|
|
578
|
-
}
|
|
579
|
-
// Step 6c: Discovery report (optional)
|
|
580
|
-
if (opts.discoveryReportEnabled) {
|
|
581
|
-
steps.push({
|
|
582
|
-
cacheStatus: "miss",
|
|
583
|
-
name: "Discovery report",
|
|
584
|
-
reason: "Analyze agent discoverability from retrieval metrics",
|
|
585
|
-
willRun: true,
|
|
586
|
-
});
|
|
587
|
-
}
|
|
588
566
|
return steps;
|
|
589
567
|
}
|
|
590
568
|
function collectFilesCreated(opts) {
|
|
@@ -601,12 +579,6 @@ function collectFilesCreated(opts) {
|
|
|
601
579
|
files.push("results/latest/failure-modes.json");
|
|
602
580
|
files.push("results/latest/gap-analysis.json");
|
|
603
581
|
}
|
|
604
|
-
if (opts.readinessEnabled) {
|
|
605
|
-
files.push("results/latest/readiness-report.md");
|
|
606
|
-
}
|
|
607
|
-
if (opts.discoveryReportEnabled) {
|
|
608
|
-
files.push("results/latest/discovery-report.md");
|
|
609
|
-
}
|
|
610
582
|
return files.sort();
|
|
611
583
|
}
|
|
612
584
|
// ---------------------------------------------------------------------------
|
|
@@ -85,7 +85,7 @@ export interface RunContextInput {
|
|
|
85
85
|
export declare function buildRunContext(input: RunContextInput): RunContext;
|
|
86
86
|
/**
|
|
87
87
|
* Resolve `classification` from `AILF_CLASSIFICATION`, validated against
|
|
88
|
-
* the closed enum. Defaults to `"
|
|
88
|
+
* the closed enum. Defaults to `"adhoc"` so unannotated runs never leak
|
|
89
89
|
* into the canonical `"official"` series.
|
|
90
90
|
*/
|
|
91
91
|
export declare function detectClassification(log: Logger): RunClassification;
|
|
@@ -169,17 +169,17 @@ function detectTrigger() {
|
|
|
169
169
|
// ---------------------------------------------------------------------------
|
|
170
170
|
/**
|
|
171
171
|
* Resolve `classification` from `AILF_CLASSIFICATION`, validated against
|
|
172
|
-
* the closed enum. Defaults to `"
|
|
172
|
+
* the closed enum. Defaults to `"adhoc"` so unannotated runs never leak
|
|
173
173
|
* into the canonical `"official"` series.
|
|
174
174
|
*/
|
|
175
175
|
export function detectClassification(log) {
|
|
176
176
|
const raw = process.env.AILF_CLASSIFICATION?.trim();
|
|
177
177
|
if (!raw)
|
|
178
|
-
return "
|
|
178
|
+
return "adhoc";
|
|
179
179
|
if (isRunClassification(raw))
|
|
180
180
|
return raw;
|
|
181
|
-
log.warn(`AILF_CLASSIFICATION="${raw}" is not a recognized value; defaulting to "
|
|
182
|
-
return "
|
|
181
|
+
log.warn(`AILF_CLASSIFICATION="${raw}" is not a recognized value; defaulting to "adhoc"`);
|
|
182
|
+
return "adhoc";
|
|
183
183
|
}
|
|
184
184
|
/**
|
|
185
185
|
* Resolve `owner` from `AILF_OWNER_TEAM` (+ optional
|
|
@@ -63,6 +63,6 @@ export declare function validateTaskFiles(rootDir: string): ValidationIssue[];
|
|
|
63
63
|
*
|
|
64
64
|
* Returns warnings (not errors) if the file is missing — thresholds are
|
|
65
65
|
* optional and don't block evaluation. They only activate when
|
|
66
|
-
*
|
|
66
|
+
* `ailf report readiness` or severity-aware sink routing is used.
|
|
67
67
|
*/
|
|
68
68
|
export declare function validateThresholdsYaml(rootDir: string): ValidationIssue[];
|
|
@@ -272,7 +272,7 @@ export function validateTaskFiles(rootDir) {
|
|
|
272
272
|
*
|
|
273
273
|
* Returns warnings (not errors) if the file is missing — thresholds are
|
|
274
274
|
* optional and don't block evaluation. They only activate when
|
|
275
|
-
*
|
|
275
|
+
* `ailf report readiness` or severity-aware sink routing is used.
|
|
276
276
|
*/
|
|
277
277
|
export function validateThresholdsYaml(rootDir) {
|
|
278
278
|
const source = "validateThresholdsYaml";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sanity/ailf",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.8.1",
|
|
4
4
|
"private": false,
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
@@ -50,6 +50,7 @@
|
|
|
50
50
|
"@anthropic-ai/claude-agent-sdk": "^0.2.105",
|
|
51
51
|
"@types/js-yaml": "^4.0.9",
|
|
52
52
|
"@types/node": "^22.13.1",
|
|
53
|
+
"nock": "^14.0.13",
|
|
53
54
|
"tsx": "^4.19.2",
|
|
54
55
|
"typescript": "^5.7.3",
|
|
55
56
|
"@sanity/ailf-core": "0.1.0",
|
|
@@ -67,20 +68,21 @@
|
|
|
67
68
|
"grader-compare": "tsx src/cli.ts grader compare",
|
|
68
69
|
"grader-sensitivity": "tsx src/cli.ts grader sensitivity",
|
|
69
70
|
"calculate-scores": "tsx src/cli.ts calculate-scores",
|
|
70
|
-
"agent-report": "tsx src/cli.ts agent
|
|
71
|
+
"agent-report": "tsx src/cli.ts report agent",
|
|
71
72
|
"share": "dotenv -e ../../.env -o -- promptfoo share",
|
|
72
73
|
"view": "dotenv -e ../../.env -o -- promptfoo view",
|
|
73
74
|
"cli": "tsx src/cli.ts",
|
|
74
75
|
"pipeline": "tsx src/cli.ts pipeline",
|
|
75
|
-
"validate": "tsx src/cli.ts validate",
|
|
76
|
-
"test": "tsx --test src/__tests__/*.test.ts",
|
|
76
|
+
"validate": "tsx src/cli.ts validate config",
|
|
77
|
+
"test": "tsx --test src/__tests__/*.test.ts src/adapters/**/__tests__/*.adapter.test.ts",
|
|
77
78
|
"test:e2e": "AILF_E2E=1 tsx --test src/__tests__/e2e/*.e2e.test.ts",
|
|
78
|
-
"test:
|
|
79
|
+
"test:e2e:adapters": "AILF_E2E=1 tsx --test src/adapters/**/__tests__/*.adapter.test.ts",
|
|
80
|
+
"test:all": "AILF_E2E=1 tsx --test src/__tests__/*.test.ts src/pipeline/compiler/__tests__/*.test.ts src/__tests__/e2e/*.e2e.test.ts src/adapters/**/__tests__/*.adapter.test.ts",
|
|
79
81
|
"pr-comment": "tsx src/cli.ts pr-comment",
|
|
80
|
-
"coverage-audit": "tsx src/cli.ts coverage
|
|
81
|
-
"readiness-report": "tsx src/cli.ts readiness
|
|
82
|
-
"discovery-report": "tsx src/cli.ts discovery
|
|
82
|
+
"coverage-audit": "tsx src/cli.ts report coverage",
|
|
83
|
+
"readiness-report": "tsx src/cli.ts report readiness",
|
|
84
|
+
"discovery-report": "tsx src/cli.ts report discovery",
|
|
83
85
|
"webhook-server": "tsx src/cli.ts webhook-server",
|
|
84
|
-
"weekly-digest": "tsx src/cli.ts
|
|
86
|
+
"weekly-digest": "tsx src/cli.ts report digest"
|
|
85
87
|
}
|
|
86
88
|
}
|
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* pipeline command — the main evaluation pipeline orchestrator.
|
|
3
|
-
*
|
|
4
|
-
* Defines all 36+ CLI flags via Commander, resolves them into a typed
|
|
5
|
-
* options object, bridges to process.env for downstream modules, and
|
|
6
|
-
* delegates to runPipeline().
|
|
7
|
-
*
|
|
8
|
-
* @see docs/cli.md for the full flag reference.
|
|
9
|
-
*/
|
|
10
|
-
import { Command } from "commander";
|
|
11
|
-
/**
|
|
12
|
-
* Raw CLI options as parsed by Commander.
|
|
13
|
-
* Field names follow Commander's camelCase convention for kebab-case flags.
|
|
14
|
-
*/
|
|
15
|
-
export interface PipelineCliOptions {
|
|
16
|
-
allowedOrigin: string[];
|
|
17
|
-
allowedOrigins: string[];
|
|
18
|
-
area?: string;
|
|
19
|
-
autoScope: boolean;
|
|
20
|
-
before?: string;
|
|
21
|
-
cache: boolean;
|
|
22
|
-
changedDocs?: string;
|
|
23
|
-
compare: boolean;
|
|
24
|
-
compareBaseline?: string;
|
|
25
|
-
concurrency?: number;
|
|
26
|
-
config?: string;
|
|
27
|
-
debug: boolean;
|
|
28
|
-
debugN?: number;
|
|
29
|
-
debugPattern?: string;
|
|
30
|
-
debugSample?: number;
|
|
31
|
-
discoveryReport: boolean;
|
|
32
|
-
dryRun: boolean;
|
|
33
|
-
gapAnalysis: boolean;
|
|
34
|
-
graderReplications?: number;
|
|
35
|
-
header: string[];
|
|
36
|
-
headers: string[];
|
|
37
|
-
mode: string;
|
|
38
|
-
variant?: string;
|
|
39
|
-
output?: string;
|
|
40
|
-
outputDir?: string;
|
|
41
|
-
promptfooUrl?: string;
|
|
42
|
-
publish?: boolean;
|
|
43
|
-
publishTag?: string;
|
|
44
|
-
readiness: boolean;
|
|
45
|
-
remoteCache?: boolean;
|
|
46
|
-
reportDataset?: string;
|
|
47
|
-
reportProject?: string;
|
|
48
|
-
sanityDataset?: string;
|
|
49
|
-
sanityDocument: string[];
|
|
50
|
-
sanityDocuments: string[];
|
|
51
|
-
sanityPerspective?: string;
|
|
52
|
-
sanityProject?: string;
|
|
53
|
-
sanityStudioOrigin?: string;
|
|
54
|
-
search?: string;
|
|
55
|
-
skipEval: boolean;
|
|
56
|
-
skipFetch: boolean;
|
|
57
|
-
source?: string;
|
|
58
|
-
remote: boolean;
|
|
59
|
-
repoTasksPath?: string;
|
|
60
|
-
task?: string;
|
|
61
|
-
tag: string[];
|
|
62
|
-
taskSource?: string;
|
|
63
|
-
threshold?: number;
|
|
64
|
-
url: string[];
|
|
65
|
-
urls: string[];
|
|
66
|
-
apiUrl?: string;
|
|
67
|
-
artifacts: boolean;
|
|
68
|
-
artifactsDir?: string;
|
|
69
|
-
artifactsDryRun: boolean;
|
|
70
|
-
artifactsExclude?: string;
|
|
71
|
-
classification?: string;
|
|
72
|
-
ownerTeam?: string;
|
|
73
|
-
ownerIndividual?: string;
|
|
74
|
-
purpose?: string;
|
|
75
|
-
label: string[];
|
|
76
|
-
}
|
|
77
|
-
export declare function createPipelineCommand(): Command;
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Pipeline step: Discovery report (agent discoverability analysis).
|
|
3
|
-
*
|
|
4
|
-
* Calls pure functions from pipeline/discovery-report.ts directly.
|
|
5
|
-
* Optional step — failure doesn't stop the pipeline.
|
|
6
|
-
*/
|
|
7
|
-
import { type AppContext, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
8
|
-
export declare class DiscoveryReportStep implements PipelineStep {
|
|
9
|
-
readonly name = "discovery-report";
|
|
10
|
-
readonly optional = true;
|
|
11
|
-
check(): ValidationIssue[];
|
|
12
|
-
execute(ctx: AppContext): Promise<StepResult>;
|
|
13
|
-
}
|
|
@@ -1,62 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Pipeline step: Discovery report (agent discoverability analysis).
|
|
3
|
-
*
|
|
4
|
-
* Calls pure functions from pipeline/discovery-report.ts directly.
|
|
5
|
-
* Optional step — failure doesn't stop the pipeline.
|
|
6
|
-
*/
|
|
7
|
-
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
8
|
-
import { resolve } from "path";
|
|
9
|
-
import { assoc, } from "../../_vendor/ailf-core/index.js";
|
|
10
|
-
import { emitFileContents } from "../../artifact-capture/emit-file.js";
|
|
11
|
-
import { formatDiscoveryMarkdown, generateDiscoveryReport, } from "../../pipeline/discovery-report.js";
|
|
12
|
-
export class DiscoveryReportStep {
|
|
13
|
-
name = "discovery-report";
|
|
14
|
-
optional = true;
|
|
15
|
-
check() {
|
|
16
|
-
return [];
|
|
17
|
-
}
|
|
18
|
-
async execute(ctx) {
|
|
19
|
-
const root = ctx.config.rootDir;
|
|
20
|
-
const start = Date.now();
|
|
21
|
-
try {
|
|
22
|
-
const scoreSummaryPath = resolve(root, "results", "latest", "score-summary.json");
|
|
23
|
-
if (!existsSync(scoreSummaryPath)) {
|
|
24
|
-
return {
|
|
25
|
-
durationMs: Date.now() - start,
|
|
26
|
-
error: "score-summary.json not found",
|
|
27
|
-
status: "failed",
|
|
28
|
-
};
|
|
29
|
-
}
|
|
30
|
-
const scoreSummary = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
|
|
31
|
-
if (!scoreSummary.retrievalMetrics) {
|
|
32
|
-
return {
|
|
33
|
-
status: "skipped",
|
|
34
|
-
reason: "No retrieval metrics in score summary — run an agentic evaluation first",
|
|
35
|
-
};
|
|
36
|
-
}
|
|
37
|
-
const report = generateDiscoveryReport(scoreSummary, ctx.config.areas);
|
|
38
|
-
const md = formatDiscoveryMarkdown(report);
|
|
39
|
-
// Write to outputDir (respects --output-dir)
|
|
40
|
-
mkdirSync(ctx.config.outputDir, { recursive: true });
|
|
41
|
-
const discoveryPath = resolve(ctx.config.outputDir, "discovery-report.md");
|
|
42
|
-
writeFileSync(discoveryPath, md);
|
|
43
|
-
// W0050 — discoveryReport is per-entry keyed by mode.
|
|
44
|
-
await emitFileContents(ctx.artifactWriter, "discoveryReport", assoc(ctx, { mode: ctx.config.mode }), discoveryPath);
|
|
45
|
-
console.log(md);
|
|
46
|
-
const invisible = report.invisibleDocs.length;
|
|
47
|
-
const f1 = report.overall.avgF1.toFixed(2);
|
|
48
|
-
return {
|
|
49
|
-
durationMs: Date.now() - start,
|
|
50
|
-
status: "success",
|
|
51
|
-
summary: `F1=${f1}, ${invisible} invisible doc${invisible === 1 ? "" : "s"}, ${report.recommendations.length} recommendation${report.recommendations.length === 1 ? "" : "s"}`,
|
|
52
|
-
};
|
|
53
|
-
}
|
|
54
|
-
catch (err) {
|
|
55
|
-
return {
|
|
56
|
-
durationMs: Date.now() - start,
|
|
57
|
-
error: err instanceof Error ? err.message : String(err),
|
|
58
|
-
status: "failed",
|
|
59
|
-
};
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
}
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Pipeline step: Launch readiness report.
|
|
3
|
-
*
|
|
4
|
-
* Calls pure functions from pipeline/readiness-report.ts directly.
|
|
5
|
-
* Optional step — failure doesn't stop the pipeline.
|
|
6
|
-
*/
|
|
7
|
-
import { type AppContext, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
8
|
-
export declare class ReadinessStep implements PipelineStep {
|
|
9
|
-
readonly name = "readiness";
|
|
10
|
-
readonly optional = true;
|
|
11
|
-
check(): ValidationIssue[];
|
|
12
|
-
execute(ctx: AppContext): Promise<StepResult>;
|
|
13
|
-
}
|
|
@@ -1,98 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Pipeline step: Launch readiness report.
|
|
3
|
-
*
|
|
4
|
-
* Calls pure functions from pipeline/readiness-report.ts directly.
|
|
5
|
-
* Optional step — failure doesn't stop the pipeline.
|
|
6
|
-
*/
|
|
7
|
-
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
8
|
-
import { resolve } from "path";
|
|
9
|
-
import { tryLoadConfigFile } from "../../pipeline/compiler/config-loader.js";
|
|
10
|
-
import { assoc, } from "../../_vendor/ailf-core/index.js";
|
|
11
|
-
import { emitFileContents } from "../../artifact-capture/emit-file.js";
|
|
12
|
-
import { formatReadinessMarkdown, generateReadinessReport, } from "../../pipeline/readiness-report.js";
|
|
13
|
-
import { ThresholdConfigSchema } from "../../pipeline/schemas.js";
|
|
14
|
-
export class ReadinessStep {
|
|
15
|
-
name = "readiness";
|
|
16
|
-
optional = true;
|
|
17
|
-
check() {
|
|
18
|
-
return [];
|
|
19
|
-
}
|
|
20
|
-
async execute(ctx) {
|
|
21
|
-
const root = ctx.config.rootDir;
|
|
22
|
-
const start = Date.now();
|
|
23
|
-
try {
|
|
24
|
-
const scoreSummaryPath = resolve(root, "results", "latest", "score-summary.json");
|
|
25
|
-
if (!existsSync(scoreSummaryPath)) {
|
|
26
|
-
return {
|
|
27
|
-
durationMs: Date.now() - start,
|
|
28
|
-
error: "score-summary.json not found",
|
|
29
|
-
status: "failed",
|
|
30
|
-
};
|
|
31
|
-
}
|
|
32
|
-
const thresholdsLoaded = tryLoadConfigFile("thresholds", root);
|
|
33
|
-
if (!thresholdsLoaded) {
|
|
34
|
-
return {
|
|
35
|
-
durationMs: Date.now() - start,
|
|
36
|
-
error: "config/thresholds not found",
|
|
37
|
-
status: "failed",
|
|
38
|
-
};
|
|
39
|
-
}
|
|
40
|
-
const scoreSummary = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
|
|
41
|
-
const thresholdConfig = ThresholdConfigSchema.parse(thresholdsLoaded.data);
|
|
42
|
-
// Read gap-analysis.json from outputDir (gap-analysis step writes there)
|
|
43
|
-
const gapPath = resolve(ctx.config.outputDir, "gap-analysis.json");
|
|
44
|
-
const gapAnalysis = existsSync(gapPath)
|
|
45
|
-
? JSON.parse(readFileSync(gapPath, "utf-8"))
|
|
46
|
-
: undefined;
|
|
47
|
-
const readinessAreas = ctx.config.areas ?? scoreSummary.scores.map((s) => s.feature);
|
|
48
|
-
const readinessLines = [];
|
|
49
|
-
for (const area of readinessAreas) {
|
|
50
|
-
const areaScore = scoreSummary.scores.find((s) => s.feature === area);
|
|
51
|
-
if (!areaScore) {
|
|
52
|
-
ctx.logger.warn(`Area "${area}" not found in scores — skipping`);
|
|
53
|
-
continue;
|
|
54
|
-
}
|
|
55
|
-
const report = generateReadinessReport({
|
|
56
|
-
area,
|
|
57
|
-
gapAnalysis,
|
|
58
|
-
scoreSummary,
|
|
59
|
-
thresholdConfig,
|
|
60
|
-
});
|
|
61
|
-
const md = formatReadinessMarkdown(report);
|
|
62
|
-
readinessLines.push(md);
|
|
63
|
-
console.log(md);
|
|
64
|
-
}
|
|
65
|
-
if (readinessLines.length > 0) {
|
|
66
|
-
// Write to outputDir (respects --output-dir)
|
|
67
|
-
mkdirSync(ctx.config.outputDir, { recursive: true });
|
|
68
|
-
const readinessPath = resolve(ctx.config.outputDir, "readiness-report.md");
|
|
69
|
-
writeFileSync(readinessPath, readinessLines.join("\n---\n\n"));
|
|
70
|
-
// W0050 — readinessReport is run-scoped bulk markdown.
|
|
71
|
-
await emitFileContents(ctx.artifactWriter, "readinessReport", assoc(ctx), readinessPath);
|
|
72
|
-
}
|
|
73
|
-
const passCount = readinessAreas.filter((area) => {
|
|
74
|
-
const areaScore = scoreSummary.scores.find((s) => s.feature === area);
|
|
75
|
-
if (!areaScore)
|
|
76
|
-
return false;
|
|
77
|
-
const report = generateReadinessReport({
|
|
78
|
-
area,
|
|
79
|
-
scoreSummary,
|
|
80
|
-
thresholdConfig,
|
|
81
|
-
});
|
|
82
|
-
return report.pass;
|
|
83
|
-
}).length;
|
|
84
|
-
return {
|
|
85
|
-
durationMs: Date.now() - start,
|
|
86
|
-
status: "success",
|
|
87
|
-
summary: `${passCount}/${readinessAreas.length} areas ready`,
|
|
88
|
-
};
|
|
89
|
-
}
|
|
90
|
-
catch (err) {
|
|
91
|
-
return {
|
|
92
|
-
durationMs: Date.now() - start,
|
|
93
|
-
error: err instanceof Error ? err.message : String(err),
|
|
94
|
-
status: "failed",
|
|
95
|
-
};
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
}
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* agent-harness-handler.test.ts — Tests for agent harness mode compilation.
|
|
3
|
-
*
|
|
4
|
-
* Tests validation, provider assembly, tool permission resolution,
|
|
5
|
-
* assertion mapping, sandbox config, lifecycle extensions, and
|
|
6
|
-
* end-to-end compilation of example tasks.
|
|
7
|
-
*
|
|
8
|
-
* Run: npx tsx --test src/pipeline/compiler/__tests__/agent-harness-handler.test.ts
|
|
9
|
-
*/
|
|
10
|
-
export {};
|