@sanity/ailf 2.8.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_vendor/ailf-core/artifact-capture/association.d.ts +35 -0
- package/dist/_vendor/ailf-core/artifact-capture/association.js +28 -0
- package/dist/_vendor/ailf-core/artifact-registry.d.ts +124 -23
- package/dist/_vendor/ailf-core/artifact-registry.js +708 -64
- package/dist/_vendor/ailf-core/batch-signing.d.ts +64 -0
- package/dist/_vendor/ailf-core/batch-signing.js +23 -0
- package/dist/_vendor/ailf-core/index.d.ts +3 -2
- package/dist/_vendor/ailf-core/index.js +3 -2
- package/dist/_vendor/ailf-core/ports/artifact-writer.d.ts +59 -20
- package/dist/_vendor/ailf-core/ports/artifact-writer.js +33 -10
- package/dist/_vendor/ailf-core/ports/context.d.ts +20 -17
- package/dist/_vendor/ailf-core/ports/index.d.ts +0 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +6 -6
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -0
- package/dist/_vendor/ailf-core/services/index.js +1 -0
- package/dist/_vendor/ailf-core/services/slim-report-summary.d.ts +31 -0
- package/dist/_vendor/ailf-core/services/slim-report-summary.js +217 -0
- package/dist/_vendor/ailf-core/types/branded-ids.d.ts +33 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +202 -23
- package/dist/adapters/config-sources/file-config-adapter.js +0 -4
- package/dist/artifact-capture/accumulating-artifact-writer.d.ts +50 -0
- package/dist/artifact-capture/accumulating-artifact-writer.js +111 -0
- package/dist/artifact-capture/api-gateway-artifact-writer.d.ts +17 -4
- package/dist/artifact-capture/api-gateway-artifact-writer.js +58 -7
- package/dist/artifact-capture/emit-file.d.ts +28 -0
- package/dist/artifact-capture/emit-file.js +56 -0
- package/dist/artifact-capture/fanout-artifact-writer.d.ts +39 -0
- package/dist/artifact-capture/fanout-artifact-writer.js +76 -0
- package/dist/artifact-capture/gcs-artifact-writer.d.ts +40 -3
- package/dist/artifact-capture/gcs-artifact-writer.js +238 -14
- package/dist/artifact-capture/local-fs-artifact-writer.d.ts +71 -0
- package/dist/artifact-capture/local-fs-artifact-writer.js +273 -0
- package/dist/artifact-capture/redact-artifact.d.ts +3 -5
- package/dist/artifact-capture/redact-artifact.js +3 -5
- package/dist/cli.js +56 -2
- package/dist/commands/explain-handler.js +4 -4
- package/dist/commands/pipeline-action.d.ts +5 -4
- package/dist/commands/pipeline-action.js +33 -16
- package/dist/commands/pipeline.d.ts +4 -4
- package/dist/commands/pipeline.js +4 -4
- package/dist/commands/publish.js +4 -1
- package/dist/commands/runs.d.ts +18 -0
- package/dist/commands/runs.js +71 -0
- package/dist/composition-root.d.ts +13 -10
- package/dist/composition-root.js +74 -46
- package/dist/orchestration/build-app-context.js +4 -7
- package/dist/orchestration/pipeline-orchestrator.d.ts +1 -1
- package/dist/orchestration/pipeline-orchestrator.js +37 -46
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -1
- package/dist/orchestration/steps/calculate-scores-step.js +19 -19
- package/dist/orchestration/steps/callback-step.d.ts +1 -1
- package/dist/orchestration/steps/callback-step.js +6 -4
- package/dist/orchestration/steps/compare-step.d.ts +1 -1
- package/dist/orchestration/steps/compare-step.js +4 -2
- package/dist/orchestration/steps/discovery-report-step.d.ts +1 -1
- package/dist/orchestration/steps/discovery-report-step.js +4 -1
- package/dist/orchestration/steps/fetch-docs-step.js +9 -15
- package/dist/orchestration/steps/finalize-run-step.js +21 -7
- package/dist/orchestration/steps/gap-analysis-step.js +34 -6
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -1
- package/dist/orchestration/steps/generate-configs-step.js +11 -11
- package/dist/orchestration/steps/publish-report-step.d.ts +1 -1
- package/dist/orchestration/steps/publish-report-step.js +24 -19
- package/dist/orchestration/steps/readiness-step.d.ts +1 -1
- package/dist/orchestration/steps/readiness-step.js +4 -1
- package/dist/orchestration/steps/report-step.d.ts +1 -1
- package/dist/orchestration/steps/report-step.js +6 -3
- package/dist/orchestration/steps/run-eval-step.js +14 -9
- package/dist/pipeline/compare.d.ts +2 -2
- package/dist/pipeline/emit-eval-results.d.ts +38 -0
- package/dist/pipeline/emit-eval-results.js +100 -0
- package/dist/pipeline/map-request-to-config.js +0 -4
- package/package.json +1 -1
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +0 -14
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +0 -25
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +0 -94
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +0 -13
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +0 -138
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +0 -10
- package/dist/artifact-capture/comparator.d.ts +0 -22
- package/dist/artifact-capture/comparator.js +0 -493
- package/dist/artifact-capture/filesystem-collector.d.ts +0 -42
- package/dist/artifact-capture/filesystem-collector.js +0 -237
- package/dist/artifact-capture/gcs-collector.d.ts +0 -55
- package/dist/artifact-capture/gcs-collector.js +0 -117
- package/dist/commands/capture-compare.d.ts +0 -15
- package/dist/commands/capture-compare.js +0 -253
- package/dist/commands/capture-list.d.ts +0 -12
- package/dist/commands/capture-list.js +0 -150
- package/dist/commands/capture.d.ts +0 -9
- package/dist/commands/capture.js +0 -16
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
* each step completes. This enables the GET /v1/jobs/:jobId polling
|
|
12
12
|
* endpoint to show real-time progress.
|
|
13
13
|
*/
|
|
14
|
+
import { assoc, } from "../_vendor/ailf-core/index.js";
|
|
14
15
|
import { runStep } from "./step-runner.js";
|
|
15
16
|
// ---------------------------------------------------------------------------
|
|
16
17
|
// Job progress reporter
|
|
@@ -75,42 +76,39 @@ async function reportJobProgress(ctx, stepName, completedSteps, totalSteps, stat
|
|
|
75
76
|
* Capture a snapshot of the pipeline config, final state, and step results.
|
|
76
77
|
* Strips secrets (API keys, tokens) from the config.
|
|
77
78
|
*/
|
|
78
|
-
function capturePipelineContext(ctx, state, results) {
|
|
79
|
-
if (!ctx.collector.enabled)
|
|
80
|
-
return;
|
|
79
|
+
async function capturePipelineContext(ctx, state, results) {
|
|
81
80
|
const sanitized = Object.fromEntries(Object.entries(ctx.config).filter(([k]) => !/token|secret|key/i.test(k)));
|
|
82
|
-
ctx.collector.capture("pipeline", "pipeline-context",
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
releaseAutoScope: state.releaseAutoScope,
|
|
92
|
-
testSummary: state.testSummary,
|
|
93
|
-
},
|
|
94
|
-
steps: Object.entries(results).map(([name, result]) => ({
|
|
95
|
-
name,
|
|
96
|
-
status: result.status,
|
|
97
|
-
durationMs: result.status !== "skipped" ? result.durationMs : undefined,
|
|
98
|
-
})),
|
|
99
|
-
});
|
|
100
|
-
}
|
|
101
|
-
/**
|
|
102
|
-
* Flush captured artifacts to disk. Non-blocking — failures are logged
|
|
103
|
-
* but never affect the pipeline result.
|
|
104
|
-
*/
|
|
105
|
-
async function flushArtifacts(ctx) {
|
|
106
|
-
if (!ctx.collector.enabled)
|
|
107
|
-
return;
|
|
81
|
+
// W0050 — migrated from ctx.collector.capture("pipeline", "pipeline-context", …)
|
|
82
|
+
// to the registry-driven emit() path. The writer handles redaction,
|
|
83
|
+
// --capture-exclude gating, and local+GCS fanout internally.
|
|
84
|
+
//
|
|
85
|
+
// Awaited (not fire-and-forget) so the write is observable by the
|
|
86
|
+
// orchestrator's caller — a fire-and-forget let the emit fall through
|
|
87
|
+
// to runtime teardown in tests with aggressive afterEach cleanup.
|
|
88
|
+
// `emit` is non-blocking internally (P5): failures return null + warn,
|
|
89
|
+
// never throw, so awaiting can't surface a rejected promise either.
|
|
108
90
|
try {
|
|
109
|
-
|
|
110
|
-
|
|
91
|
+
await ctx.artifactWriter.emit("pipelineContext", assoc(ctx), {
|
|
92
|
+
config: sanitized,
|
|
93
|
+
state: {
|
|
94
|
+
reportId: state.reportId,
|
|
95
|
+
evalFingerprint: state.evalFingerprint,
|
|
96
|
+
belowCritical: state.belowCritical,
|
|
97
|
+
remoteCacheHits: state.remoteCacheHits
|
|
98
|
+
? [...state.remoteCacheHits]
|
|
99
|
+
: undefined,
|
|
100
|
+
releaseAutoScope: state.releaseAutoScope,
|
|
101
|
+
testSummary: state.testSummary,
|
|
102
|
+
},
|
|
103
|
+
steps: Object.entries(results).map(([name, result]) => ({
|
|
104
|
+
name,
|
|
105
|
+
status: result.status,
|
|
106
|
+
durationMs: result.status !== "skipped" ? result.durationMs : undefined,
|
|
107
|
+
})),
|
|
108
|
+
});
|
|
111
109
|
}
|
|
112
110
|
catch (err) {
|
|
113
|
-
ctx.logger.
|
|
111
|
+
ctx.logger.debug(`pipelineContext emit rejected: ${err instanceof Error ? err.message : String(err)}`);
|
|
114
112
|
}
|
|
115
113
|
}
|
|
116
114
|
// ---------------------------------------------------------------------------
|
|
@@ -169,13 +167,10 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
169
167
|
step: step.name,
|
|
170
168
|
}, jobUpdates);
|
|
171
169
|
}
|
|
172
|
-
// Capture pipeline context
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
}
|
|
177
|
-
// Flush captured artifacts even on failure (partial capture is useful)
|
|
178
|
-
await flushArtifacts(ctx);
|
|
170
|
+
// Capture pipeline context before exiting. `job-updates` was an
|
|
171
|
+
// observability-only capture not tied to a registered artifact type;
|
|
172
|
+
// dropped in W0050. Use the JobStore path for job telemetry.
|
|
173
|
+
await capturePipelineContext(ctx, state, results);
|
|
179
174
|
return {
|
|
180
175
|
belowCritical: state.belowCritical,
|
|
181
176
|
durationMs: Date.now() - pipelineStart,
|
|
@@ -229,13 +224,9 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
229
224
|
ctx.logger.warn("Failed to report job completion — continuing");
|
|
230
225
|
}
|
|
231
226
|
}
|
|
232
|
-
// Capture pipeline context
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
ctx.collector.capture("job-store", "job-updates", jobUpdates);
|
|
236
|
-
}
|
|
237
|
-
// Flush captured artifacts (non-blocking — failures never affect pipeline result)
|
|
238
|
-
await flushArtifacts(ctx);
|
|
227
|
+
// Capture pipeline context. `job-updates` observability captures were
|
|
228
|
+
// dropped in Slice 6.1 — JobStore is the supported telemetry path.
|
|
229
|
+
await capturePipelineContext(ctx, state, results);
|
|
239
230
|
return {
|
|
240
231
|
belowCritical: state.belowCritical,
|
|
241
232
|
durationMs,
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Calls calculateAndWriteScores() from pipeline/calculate-scores.ts with
|
|
5
5
|
* typed options derived from AppContext. No env bridge needed.
|
|
6
6
|
*/
|
|
7
|
-
import type
|
|
7
|
+
import { type AppContext, type PipelineState, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
8
8
|
export declare class CalculateScoresStep implements PipelineStep {
|
|
9
9
|
readonly name = "calculate-scores";
|
|
10
10
|
check(): ValidationIssue[];
|
|
@@ -6,6 +6,8 @@
|
|
|
6
6
|
*/
|
|
7
7
|
import { existsSync, readFileSync } from "node:fs";
|
|
8
8
|
import { join, resolve } from "path";
|
|
9
|
+
import { assoc, } from "../../_vendor/ailf-core/index.js";
|
|
10
|
+
import { emitFileContents } from "../../artifact-capture/emit-file.js";
|
|
9
11
|
import { LiteracyVariant } from "../../pipeline/normalize-mode.js";
|
|
10
12
|
import { getStepInputPaths } from "../../pipeline/cache.js";
|
|
11
13
|
import { buildCacheContext } from "../cache-context.js";
|
|
@@ -122,16 +124,15 @@ export class CalculateScoresStep {
|
|
|
122
124
|
state.belowCritical = belowCritical;
|
|
123
125
|
}
|
|
124
126
|
// Capture score artifacts
|
|
127
|
+
// W0050 — score-summary → scoreSummary (run-scoped bulk).
|
|
128
|
+
// grader-judgments.json and test-results.json were aggregated captures
|
|
129
|
+
// without registered descriptors. graderJudgments is now per-entry
|
|
130
|
+
// ({run, mode, task, model, grader}) and lands via run-eval-step in
|
|
131
|
+
// Slice 6.6; the aggregated file is dropped.
|
|
125
132
|
const resultsDir = join(ctx.config.rootDir, "results", "latest");
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
"test-results.json",
|
|
130
|
-
]) {
|
|
131
|
-
const filePath = join(resultsDir, file);
|
|
132
|
-
if (existsSync(filePath)) {
|
|
133
|
-
ctx.collector.captureFile("calculate-scores", file.replace(".json", ""), filePath);
|
|
134
|
-
}
|
|
133
|
+
const summaryPath = join(resultsDir, "score-summary.json");
|
|
134
|
+
if (existsSync(summaryPath)) {
|
|
135
|
+
await emitFileContents(ctx.artifactWriter, "scoreSummary", assoc(ctx), summaryPath);
|
|
135
136
|
}
|
|
136
137
|
// Upload testOutputs to GCS (D0032 — non-blocking, P5).
|
|
137
138
|
// Read from test-results.json rather than score-summary.json: the
|
|
@@ -142,16 +143,15 @@ export class CalculateScoresStep {
|
|
|
142
143
|
// The full responseOutput lives in the GCS artifact; PublishReportStep
|
|
143
144
|
// later strips it from the inline Content Lake document when this
|
|
144
145
|
// upload succeeds.
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
}
|
|
146
|
+
// W0050 — ctx.artifactWriter is always present; no guard needed.
|
|
147
|
+
const testResults = tryReadTestResults(ctx.config.rootDir);
|
|
148
|
+
if (testResults?.length) {
|
|
149
|
+
const artifactRef = await uploadTestOutputs(ctx.artifactWriter, ctx.runId, testResults);
|
|
150
|
+
if (artifactRef) {
|
|
151
|
+
state.artifactRefs = {
|
|
152
|
+
...state.artifactRefs,
|
|
153
|
+
testOutputs: artifactRef,
|
|
154
|
+
};
|
|
155
155
|
}
|
|
156
156
|
}
|
|
157
157
|
const criticalSuffix = belowCritical.length > 0
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
* @see packages/eval/src/pipeline/callback-delivery.ts
|
|
12
12
|
* @see docs/design-docs/api-service-gateway.md
|
|
13
13
|
*/
|
|
14
|
-
import type
|
|
14
|
+
import { type AppContext, type PipelineState, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
15
15
|
import { type CallbackConfig } from "../../pipeline/callback-delivery.js";
|
|
16
16
|
export declare class CallbackStep implements PipelineStep {
|
|
17
17
|
private readonly callback;
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
*/
|
|
14
14
|
import { readFileSync } from "fs";
|
|
15
15
|
import { resolve } from "path";
|
|
16
|
+
import { assoc, } from "../../_vendor/ailf-core/index.js";
|
|
16
17
|
import { deliverCallback, } from "../../pipeline/callback-delivery.js";
|
|
17
18
|
export class CallbackStep {
|
|
18
19
|
callback;
|
|
@@ -58,11 +59,12 @@ export class CallbackStep {
|
|
|
58
59
|
reportId: state.reportId,
|
|
59
60
|
summary,
|
|
60
61
|
};
|
|
61
|
-
//
|
|
62
|
-
|
|
62
|
+
// W0050 — callbackRequest/callbackResponse are per-entry artifacts
|
|
63
|
+
// keyed by the callback target URL (the `name` slot on the association).
|
|
64
|
+
const callbackName = this.callback.url;
|
|
65
|
+
await ctx.artifactWriter.emit("callbackRequest", assoc(ctx, { name: callbackName }), callbackPayload);
|
|
63
66
|
const result = await deliverCallback(this.callback, callbackPayload);
|
|
64
|
-
|
|
65
|
-
ctx.collector.capture("callback", "callback-response", {
|
|
67
|
+
await ctx.artifactWriter.emit("callbackResponse", assoc(ctx, { name: callbackName }), {
|
|
66
68
|
ok: result.ok,
|
|
67
69
|
attempts: result.attempts,
|
|
68
70
|
error: result.error,
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* inlined directly from the former pipeline/steps/compare-step.ts.
|
|
6
6
|
* This is an optional step — failure doesn't stop the pipeline.
|
|
7
7
|
*/
|
|
8
|
-
import type
|
|
8
|
+
import { type AppContext, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
9
9
|
export declare class CompareStep implements PipelineStep {
|
|
10
10
|
readonly name = "compare";
|
|
11
11
|
readonly optional = true;
|
|
@@ -7,6 +7,8 @@
|
|
|
7
7
|
*/
|
|
8
8
|
import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync, } from "fs";
|
|
9
9
|
import { join, resolve } from "path";
|
|
10
|
+
import { assoc, } from "../../_vendor/ailf-core/index.js";
|
|
11
|
+
import { emitFileContents } from "../../artifact-capture/emit-file.js";
|
|
10
12
|
import { compare } from "../../pipeline/compare.js";
|
|
11
13
|
export class CompareStep {
|
|
12
14
|
name = "compare";
|
|
@@ -69,8 +71,8 @@ export class CompareStep {
|
|
|
69
71
|
mkdirSync(ctx.config.outputDir, { recursive: true });
|
|
70
72
|
const reportPath = resolve(ctx.config.outputDir, "comparison-report.json");
|
|
71
73
|
writeFileSync(reportPath, JSON.stringify(report, null, 2));
|
|
72
|
-
//
|
|
73
|
-
ctx.
|
|
74
|
+
// W0050 — comparisonReport is per-entry keyed by mode ({run, mode}).
|
|
75
|
+
await emitFileContents(ctx.artifactWriter, "comparisonReport", assoc(ctx, { mode: ctx.config.mode }), reportPath);
|
|
74
76
|
// Build summary
|
|
75
77
|
const improved = report.improved.length;
|
|
76
78
|
const regressed = report.regressed.length;
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Calls pure functions from pipeline/discovery-report.ts directly.
|
|
5
5
|
* Optional step — failure doesn't stop the pipeline.
|
|
6
6
|
*/
|
|
7
|
-
import type
|
|
7
|
+
import { type AppContext, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
8
8
|
export declare class DiscoveryReportStep implements PipelineStep {
|
|
9
9
|
readonly name = "discovery-report";
|
|
10
10
|
readonly optional = true;
|
|
@@ -6,6 +6,8 @@
|
|
|
6
6
|
*/
|
|
7
7
|
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
8
8
|
import { resolve } from "path";
|
|
9
|
+
import { assoc, } from "../../_vendor/ailf-core/index.js";
|
|
10
|
+
import { emitFileContents } from "../../artifact-capture/emit-file.js";
|
|
9
11
|
import { formatDiscoveryMarkdown, generateDiscoveryReport, } from "../../pipeline/discovery-report.js";
|
|
10
12
|
export class DiscoveryReportStep {
|
|
11
13
|
name = "discovery-report";
|
|
@@ -38,7 +40,8 @@ export class DiscoveryReportStep {
|
|
|
38
40
|
mkdirSync(ctx.config.outputDir, { recursive: true });
|
|
39
41
|
const discoveryPath = resolve(ctx.config.outputDir, "discovery-report.md");
|
|
40
42
|
writeFileSync(discoveryPath, md);
|
|
41
|
-
|
|
43
|
+
// W0050 — discoveryReport is per-entry keyed by mode.
|
|
44
|
+
await emitFileContents(ctx.artifactWriter, "discoveryReport", assoc(ctx, { mode: ctx.config.mode }), discoveryPath);
|
|
42
45
|
console.log(md);
|
|
43
46
|
const invisible = report.invisibleDocs.length;
|
|
44
47
|
const f1 = report.overall.avgF1.toFixed(2);
|
|
@@ -12,7 +12,8 @@
|
|
|
12
12
|
*/
|
|
13
13
|
import { existsSync, mkdirSync, writeFileSync } from "fs";
|
|
14
14
|
import { join } from "path";
|
|
15
|
-
import { isIdRef, isPathRef, isSlugRef, } from "../../_vendor/ailf-core/index.js";
|
|
15
|
+
import { assoc, isIdRef, isPathRef, isSlugRef, } from "../../_vendor/ailf-core/index.js";
|
|
16
|
+
import { emitFileContents } from "../../artifact-capture/emit-file.js";
|
|
16
17
|
import { getStepInputPaths } from "../../pipeline/cache.js";
|
|
17
18
|
import { buildCacheContext } from "../cache-context.js";
|
|
18
19
|
import { checkCanonicalContextsExist } from "../../pipeline/checks.js";
|
|
@@ -94,20 +95,13 @@ export class FetchDocsStep {
|
|
|
94
95
|
if (result.metadata) {
|
|
95
96
|
writeMetadataFiles(ctx.config.rootDir, result.metadata);
|
|
96
97
|
}
|
|
97
|
-
//
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
["url-fetch", "url-fetch.json"],
|
|
105
|
-
]) {
|
|
106
|
-
const filePath = join(contextsDir, filename);
|
|
107
|
-
if (existsSync(filePath)) {
|
|
108
|
-
ctx.collector.captureFile("fetch-docs", type, filePath);
|
|
109
|
-
}
|
|
110
|
-
}
|
|
98
|
+
// W0050 — documentManifest is run-scoped bulk JSON. The
|
|
99
|
+
// release-impact/document-overlay/url-fetch captures had no
|
|
100
|
+
// registered descriptors (they were extras-only); dropped per Q3
|
|
101
|
+
// ("producers always call emit; registered types only").
|
|
102
|
+
const documentManifestPath = join(ctx.config.rootDir, "contexts", "document-manifest.json");
|
|
103
|
+
if (existsSync(documentManifestPath)) {
|
|
104
|
+
await emitFileContents(ctx.artifactWriter, "documentManifest", assoc(ctx), documentManifestPath);
|
|
111
105
|
}
|
|
112
106
|
}
|
|
113
107
|
catch (err) {
|
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
*/
|
|
18
18
|
import { existsSync, readFileSync } from "node:fs";
|
|
19
19
|
import { resolve } from "node:path";
|
|
20
|
+
import { AccumulatingArtifactWriter } from "../../artifact-capture/accumulating-artifact-writer.js";
|
|
20
21
|
import { buildRunContext } from "../../pipeline/run-context.js";
|
|
21
22
|
import { loadSource } from "../../sources.js";
|
|
22
23
|
import { configToSourceOverrides } from "../config-to-source-overrides.js";
|
|
@@ -34,12 +35,11 @@ export class FinalizeRunStep {
|
|
|
34
35
|
}
|
|
35
36
|
async execute(ctx, state) {
|
|
36
37
|
const start = Date.now();
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
}
|
|
38
|
+
// W0050 — `ctx.artifactWriter` is now required on AppContext
|
|
39
|
+
// (composition root always provides one; NoOpArtifactWriter when
|
|
40
|
+
// `--no-artifacts`). The pre-W0050 guard that returned "skipped" has
|
|
41
|
+
// been removed — a NoOp writer's writeManifest returns null and the
|
|
42
|
+
// code below already handles that as a non-blocking failure.
|
|
43
43
|
// Resolve the source (same input buildProvenance uses).
|
|
44
44
|
const overrides = configToSourceOverrides(ctx.config);
|
|
45
45
|
const resolvedSource = loadSource(ctx.config.source, overrides);
|
|
@@ -57,6 +57,20 @@ export class FinalizeRunStep {
|
|
|
57
57
|
source: resolvedSource,
|
|
58
58
|
taskIds: ctx.config.tasks,
|
|
59
59
|
});
|
|
60
|
+
// W0051 revisit: the composition-root wraps `ctx.artifactWriter` in
|
|
61
|
+
// `AccumulatingArtifactWriter`, which keeps a map of every ref any
|
|
62
|
+
// producer emitted this run. Merge that into `state.artifactRefs` so
|
|
63
|
+
// the manifest reflects the FULL set — not just the subset producers
|
|
64
|
+
// happened to register manually. When the writer is a NoOp / plain
|
|
65
|
+
// decorator without accumulation, `aggregated` stays empty and the
|
|
66
|
+
// manifest falls back to the producer-side registration.
|
|
67
|
+
const aggregated = ctx.artifactWriter instanceof AccumulatingArtifactWriter
|
|
68
|
+
? ctx.artifactWriter.getAccumulatedArtifactRefs()
|
|
69
|
+
: {};
|
|
70
|
+
const artifacts = {
|
|
71
|
+
...aggregated,
|
|
72
|
+
...(state.artifactRefs ?? {}),
|
|
73
|
+
};
|
|
60
74
|
const manifest = {
|
|
61
75
|
version: 1,
|
|
62
76
|
runId: ctx.runId,
|
|
@@ -68,7 +82,7 @@ export class FinalizeRunStep {
|
|
|
68
82
|
? { testSummary: state.testSummary }
|
|
69
83
|
: undefined,
|
|
70
84
|
promptfooUrls: state.promptfooUrls,
|
|
71
|
-
artifacts
|
|
85
|
+
artifacts,
|
|
72
86
|
};
|
|
73
87
|
const ref = await ctx.artifactWriter.writeManifest(ctx.runId, manifest);
|
|
74
88
|
if (!ref) {
|
|
@@ -16,7 +16,8 @@
|
|
|
16
16
|
*/
|
|
17
17
|
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
18
18
|
import { join, resolve } from "path";
|
|
19
|
-
import { isSlugRef } from "../../_vendor/ailf-core/index.js";
|
|
19
|
+
import { assoc, isSlugRef } from "../../_vendor/ailf-core/index.js";
|
|
20
|
+
import { emitFileContents } from "../../artifact-capture/emit-file.js";
|
|
20
21
|
export class GapAnalysisStep {
|
|
21
22
|
name = "gap-analysis";
|
|
22
23
|
optional = true;
|
|
@@ -194,14 +195,29 @@ export class GapAnalysisStep {
|
|
|
194
195
|
...(testResults !== undefined && { testResults }),
|
|
195
196
|
};
|
|
196
197
|
writeFileSync(scoreSummaryPath, JSON.stringify(enrichedSummary, null, 2));
|
|
197
|
-
//
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
198
|
+
// W0051 Slice 2 — failureModes is per-entry keyed by {mode, category};
|
|
199
|
+
// one entry per classified FailureModeType. Zero-count categories are
|
|
200
|
+
// skipped to keep the manifest honest about what the run surfaced.
|
|
201
|
+
const classifiedByCategory = new Map();
|
|
202
|
+
for (const cj of failureModeReport.classifiedJudgments) {
|
|
203
|
+
const cat = cj.classification.mode;
|
|
204
|
+
const bucket = classifiedByCategory.get(cat) ?? [];
|
|
205
|
+
bucket.push(cj);
|
|
206
|
+
classifiedByCategory.set(cat, bucket);
|
|
207
|
+
}
|
|
208
|
+
for (const [category, classified] of classifiedByCategory) {
|
|
209
|
+
if (classified.length === 0)
|
|
210
|
+
continue;
|
|
211
|
+
await ctx.artifactWriter.emit("failureModes", assoc(ctx, { mode: ctx.config.mode, category }), {
|
|
212
|
+
category,
|
|
213
|
+
count: classified.length,
|
|
214
|
+
title: toTitleCase(category),
|
|
215
|
+
judgments: classified.map((c) => c.judgment),
|
|
216
|
+
});
|
|
201
217
|
}
|
|
202
218
|
const gapReportPath = join(outDir, "gap-analysis.json");
|
|
203
219
|
if (existsSync(gapReportPath)) {
|
|
204
|
-
ctx.
|
|
220
|
+
await emitFileContents(ctx.artifactWriter, "gapReport", assoc(ctx), gapReportPath);
|
|
205
221
|
}
|
|
206
222
|
const gapCount = gapReport.gaps.length;
|
|
207
223
|
const classRate = failureModeReport.classificationRate.toFixed(0);
|
|
@@ -223,6 +239,18 @@ export class GapAnalysisStep {
|
|
|
223
239
|
// ---------------------------------------------------------------------------
|
|
224
240
|
// Helpers
|
|
225
241
|
// ---------------------------------------------------------------------------
|
|
242
|
+
/**
|
|
243
|
+
* Render a kebab-case FailureModeType id as Title Case for the manifest
|
|
244
|
+
* entry's display title (e.g. `"missing-docs"` → `"Missing Docs"`). Kept
|
|
245
|
+
* local to the producer so the registry descriptor stays decoupled from
|
|
246
|
+
* eval-side types.
|
|
247
|
+
*/
|
|
248
|
+
function toTitleCase(id) {
|
|
249
|
+
return id
|
|
250
|
+
.split("-")
|
|
251
|
+
.map((w) => (w.length === 0 ? w : w[0].toUpperCase() + w.slice(1)))
|
|
252
|
+
.join(" ");
|
|
253
|
+
}
|
|
226
254
|
/**
|
|
227
255
|
* Extract slug strings from polymorphic canonical doc refs.
|
|
228
256
|
*
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* When the variant is "full", the handler is called twice (baseline + agentic)
|
|
9
9
|
* and three YAML files are written. Other modes produce one YAML file.
|
|
10
10
|
*/
|
|
11
|
-
import type
|
|
11
|
+
import { type AppContext, type PipelineState, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
12
12
|
export declare class GenerateConfigsStep implements PipelineStep {
|
|
13
13
|
readonly name = "generate-configs";
|
|
14
14
|
/** Task IDs from the last loadTasks call (pre-filter), for error messages. */
|
|
@@ -10,6 +10,8 @@
|
|
|
10
10
|
*/
|
|
11
11
|
import { existsSync } from "node:fs";
|
|
12
12
|
import { resolve } from "node:path";
|
|
13
|
+
import { assoc, } from "../../_vendor/ailf-core/index.js";
|
|
14
|
+
import { emitFileContents } from "../../artifact-capture/emit-file.js";
|
|
13
15
|
import { LiteracyVariant } from "../../pipeline/normalize-mode.js";
|
|
14
16
|
import { modelMatchesLiteracyVariant } from "../../pipeline/compiler/mode-bases/literacy.js";
|
|
15
17
|
import { getStepInputPaths } from "../../pipeline/cache.js";
|
|
@@ -136,12 +138,14 @@ export class GenerateConfigsStep {
|
|
|
136
138
|
maxConcurrency: models.maxConcurrency,
|
|
137
139
|
logger: ctx.logger,
|
|
138
140
|
});
|
|
139
|
-
//
|
|
141
|
+
// W0050 — configSnapshot is per-entry keyed by mode. For literacy,
|
|
142
|
+
// each variant produces a distinct config, so the variant name is the
|
|
143
|
+
// mode-axis value here.
|
|
140
144
|
const { configFileForMode } = await import("../../pipeline/eval-constants.js");
|
|
141
145
|
for (const variant of ["baseline", "agentic", "observed"]) {
|
|
142
146
|
const configPath = resolve(ctx.config.rootDir, configFileForMode(variant));
|
|
143
147
|
if (existsSync(configPath)) {
|
|
144
|
-
ctx.
|
|
148
|
+
await emitFileContents(ctx.artifactWriter, "configSnapshot", assoc(ctx, { mode: `literacy-${variant}` }), configPath);
|
|
145
149
|
}
|
|
146
150
|
}
|
|
147
151
|
return this.checkLiteracyPostconditions(ctx, start);
|
|
@@ -187,18 +191,14 @@ export class GenerateConfigsStep {
|
|
|
187
191
|
maxConcurrency: models.maxConcurrency,
|
|
188
192
|
logger: ctx.logger,
|
|
189
193
|
});
|
|
190
|
-
//
|
|
194
|
+
// W0050 — configSnapshot for a single-mode compile.
|
|
191
195
|
const configPath = resolve(ctx.config.rootDir, `promptfooconfig.${mode}.yaml`);
|
|
192
196
|
if (existsSync(configPath)) {
|
|
193
|
-
ctx.
|
|
194
|
-
}
|
|
195
|
-
// Capture mode-specific test artifacts (extras)
|
|
196
|
-
if (ctx.collector.extrasEnabled) {
|
|
197
|
-
const testsPath = resolve(ctx.config.rootDir, "results", "latest", `${mode}-tests.json`);
|
|
198
|
-
if (existsSync(testsPath)) {
|
|
199
|
-
ctx.collector.captureFile("generate-configs", `${mode}-tests`, testsPath, { mode });
|
|
200
|
-
}
|
|
197
|
+
await emitFileContents(ctx.artifactWriter, "configSnapshot", assoc(ctx, { mode }), configPath);
|
|
201
198
|
}
|
|
199
|
+
// W0050 — the mode-specific `${mode}-tests.json` file was an
|
|
200
|
+
// extras-only capture with no registered descriptor. Dropped; the
|
|
201
|
+
// same information lives in the configSnapshot + rawResults chain.
|
|
202
202
|
return {
|
|
203
203
|
durationMs: Date.now() - start,
|
|
204
204
|
status: "success",
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* - P5: Local-first (pipeline never fails because of a store write)
|
|
11
11
|
* - P6: Sinks are fire-and-forget (failures logged, not thrown)
|
|
12
12
|
*/
|
|
13
|
-
import type
|
|
13
|
+
import { type AppContext, type PipelineState, type PipelineStep, type PromptfooUrlEntry, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
14
14
|
export declare class PublishReportStep implements PipelineStep {
|
|
15
15
|
private readonly pipelineStart;
|
|
16
16
|
private readonly options;
|
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
*/
|
|
13
13
|
import { readFileSync } from "fs";
|
|
14
14
|
import { resolve } from "path";
|
|
15
|
+
import { assoc, buildSlimReportSummary, } from "../../_vendor/ailf-core/index.js";
|
|
15
16
|
import { checkScoreSummaryValid } from "../../pipeline/checks.js";
|
|
16
17
|
import { buildProvenance, } from "../../pipeline/provenance.js";
|
|
17
18
|
import { generateReportTitle } from "../../pipeline/report-title.js";
|
|
@@ -103,13 +104,19 @@ export class PublishReportStep {
|
|
|
103
104
|
};
|
|
104
105
|
}
|
|
105
106
|
const title = generateReportTitle({ provenance });
|
|
107
|
+
// W0051 Slice 3: transform the full pipeline-internal ScoreSummary into
|
|
108
|
+
// the slim ReportSummary that lives on the Content Lake document.
|
|
109
|
+
// Prose fields (grader reasons, failureModes full text, gap prose,
|
|
110
|
+
// agentBehavior arrays) point at their external artifacts via
|
|
111
|
+
// `id = manifestEntryKey`; Studio hydrates on drill-down.
|
|
112
|
+
const slimSummary = buildSlimReportSummary(summary, ctx.config.mode);
|
|
106
113
|
const report = {
|
|
107
114
|
comparison: comparison ?? undefined,
|
|
108
115
|
completedAt: now,
|
|
109
116
|
durationMs,
|
|
110
117
|
id: reportId,
|
|
111
118
|
provenance,
|
|
112
|
-
summary,
|
|
119
|
+
summary: slimSummary,
|
|
113
120
|
tag: this.options.publishTag ?? ctx.config.publishTag,
|
|
114
121
|
title,
|
|
115
122
|
};
|
|
@@ -126,19 +133,20 @@ export class PublishReportStep {
|
|
|
126
133
|
// full output lives in the GCS artifact. When no testOutputs artifact
|
|
127
134
|
// exists, leave the inline shape intact so Studio's drill-down UI
|
|
128
135
|
// falls back to it.
|
|
129
|
-
if (artifactManifest?.testOutputs &&
|
|
136
|
+
if (artifactManifest?.testOutputs && slimSummary.testResults?.length) {
|
|
130
137
|
report.summary = {
|
|
131
|
-
...
|
|
132
|
-
testResults:
|
|
138
|
+
...slimSummary,
|
|
139
|
+
testResults: slimSummary.testResults.map(slimTestResult),
|
|
133
140
|
};
|
|
134
141
|
}
|
|
135
142
|
// Share reportId with downstream steps (CallbackStep + orchestrator job update)
|
|
136
143
|
state.reportId = reportId;
|
|
137
|
-
//
|
|
138
|
-
|
|
139
|
-
|
|
144
|
+
// W0050 — migrated from ctx.collector.capture to the unified writer.
|
|
145
|
+
// reportSnapshot: full Report JSON for replay (run-scoped, bulk).
|
|
146
|
+
await ctx.artifactWriter.emit("reportSnapshot", assoc(ctx), report);
|
|
147
|
+
// autoComparison: delta vs baseline (run-scoped, bulk, optional).
|
|
140
148
|
if (comparison) {
|
|
141
|
-
ctx.
|
|
149
|
+
await ctx.artifactWriter.emit("autoComparison", assoc(ctx), comparison);
|
|
142
150
|
}
|
|
143
151
|
// Write to store (system of record — best-effort, P5)
|
|
144
152
|
const sanityResult = ctx.reportStore
|
|
@@ -146,17 +154,14 @@ export class PublishReportStep {
|
|
|
146
154
|
: null;
|
|
147
155
|
// Run sinks (fire-and-forget, P6)
|
|
148
156
|
const publishResult = await runSinks(report, ctx);
|
|
149
|
-
//
|
|
150
|
-
|
|
151
|
-
ctx.
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
...(r.result.status === "failed" ? { error: r.result.error } : {}),
|
|
158
|
-
...(r.result.status === "skipped" ? { reason: r.result.reason } : {}),
|
|
159
|
-
})),
|
|
157
|
+
// sinkResults: per-sink outcome (run-scoped, per-entry keyed by sink name).
|
|
158
|
+
for (const r of publishResult.sinkResults) {
|
|
159
|
+
await ctx.artifactWriter.emit("sinkResults", assoc(ctx, { name: r.name }), {
|
|
160
|
+
name: r.name,
|
|
161
|
+
status: r.result.status,
|
|
162
|
+
...(r.result.status === "success" ? { detail: r.result.detail } : {}),
|
|
163
|
+
...(r.result.status === "failed" ? { error: r.result.error } : {}),
|
|
164
|
+
...(r.result.status === "skipped" ? { reason: r.result.reason } : {}),
|
|
160
165
|
});
|
|
161
166
|
}
|
|
162
167
|
// Build result summary
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Calls pure functions from pipeline/readiness-report.ts directly.
|
|
5
5
|
* Optional step — failure doesn't stop the pipeline.
|
|
6
6
|
*/
|
|
7
|
-
import type
|
|
7
|
+
import { type AppContext, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
8
8
|
export declare class ReadinessStep implements PipelineStep {
|
|
9
9
|
readonly name = "readiness";
|
|
10
10
|
readonly optional = true;
|
|
@@ -7,6 +7,8 @@
|
|
|
7
7
|
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
8
8
|
import { resolve } from "path";
|
|
9
9
|
import { tryLoadConfigFile } from "../../pipeline/compiler/config-loader.js";
|
|
10
|
+
import { assoc, } from "../../_vendor/ailf-core/index.js";
|
|
11
|
+
import { emitFileContents } from "../../artifact-capture/emit-file.js";
|
|
10
12
|
import { formatReadinessMarkdown, generateReadinessReport, } from "../../pipeline/readiness-report.js";
|
|
11
13
|
import { ThresholdConfigSchema } from "../../pipeline/schemas.js";
|
|
12
14
|
export class ReadinessStep {
|
|
@@ -65,7 +67,8 @@ export class ReadinessStep {
|
|
|
65
67
|
mkdirSync(ctx.config.outputDir, { recursive: true });
|
|
66
68
|
const readinessPath = resolve(ctx.config.outputDir, "readiness-report.md");
|
|
67
69
|
writeFileSync(readinessPath, readinessLines.join("\n---\n\n"));
|
|
68
|
-
|
|
70
|
+
// W0050 — readinessReport is run-scoped bulk markdown.
|
|
71
|
+
await emitFileContents(ctx.artifactWriter, "readinessReport", assoc(ctx), readinessPath);
|
|
69
72
|
}
|
|
70
73
|
const passCount = readinessAreas.filter((area) => {
|
|
71
74
|
const areaScore = scoreSummary.scores.find((s) => s.feature === area);
|