@sanity/ailf 2.9.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_vendor/ailf-core/artifact-capture/association.d.ts +37 -0
- package/dist/_vendor/ailf-core/artifact-capture/association.js +19 -0
- package/dist/_vendor/ailf-core/artifact-registry.d.ts +1 -1
- package/dist/_vendor/ailf-core/artifact-registry.js +1 -18
- package/dist/_vendor/ailf-core/batch-signing.d.ts +64 -0
- package/dist/_vendor/ailf-core/batch-signing.js +23 -0
- package/dist/_vendor/ailf-core/index.d.ts +2 -2
- package/dist/_vendor/ailf-core/index.js +2 -2
- package/dist/_vendor/ailf-core/ports/context.d.ts +12 -20
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -2
- package/dist/_vendor/ailf-core/ports/index.js +1 -0
- package/dist/_vendor/ailf-core/ports/progress-reporter.d.ts +74 -0
- package/dist/_vendor/ailf-core/ports/progress-reporter.js +26 -0
- package/dist/_vendor/ailf-core/services/slim-report-summary.js +1 -16
- package/dist/adapters/config-sources/file-config-adapter.js +0 -4
- package/dist/adapters/progress/console-progress-reporter.d.ts +35 -0
- package/dist/adapters/progress/console-progress-reporter.js +110 -0
- package/dist/artifact-capture/api-gateway-artifact-writer.d.ts +8 -1
- package/dist/artifact-capture/api-gateway-artifact-writer.js +79 -42
- package/dist/artifact-capture/batching-api-gateway-artifact-writer.d.ts +108 -0
- package/dist/artifact-capture/batching-api-gateway-artifact-writer.js +492 -0
- package/dist/artifact-capture/fanout-artifact-writer.d.ts +14 -2
- package/dist/artifact-capture/fanout-artifact-writer.js +25 -4
- package/dist/artifact-capture/gcs-artifact-writer.d.ts +27 -1
- package/dist/artifact-capture/gcs-artifact-writer.js +168 -38
- package/dist/artifact-capture/instrumented-artifact-writer.d.ts +32 -0
- package/dist/artifact-capture/instrumented-artifact-writer.js +151 -0
- package/dist/artifact-capture/local-fs-artifact-writer.d.ts +8 -1
- package/dist/artifact-capture/local-fs-artifact-writer.js +23 -4
- package/dist/artifact-capture/parallel-emit.d.ts +43 -0
- package/dist/artifact-capture/parallel-emit.js +84 -0
- package/dist/artifact-capture/redact-artifact.d.ts +3 -5
- package/dist/artifact-capture/redact-artifact.js +3 -5
- package/dist/artifact-capture/upload-metrics.d.ts +62 -0
- package/dist/artifact-capture/upload-metrics.js +125 -0
- package/dist/cli.js +56 -2
- package/dist/commands/explain-handler.js +1 -5
- package/dist/commands/pipeline-action.d.ts +0 -4
- package/dist/commands/pipeline-action.js +11 -45
- package/dist/commands/pipeline.d.ts +1 -5
- package/dist/commands/pipeline.js +1 -5
- package/dist/commands/runs.d.ts +18 -0
- package/dist/commands/runs.js +71 -0
- package/dist/composition-root.d.ts +2 -2
- package/dist/composition-root.js +98 -38
- package/dist/orchestration/build-app-context.js +4 -7
- package/dist/orchestration/pipeline-orchestrator.js +100 -24
- package/dist/orchestration/steps/calculate-scores-step.js +1 -1
- package/dist/orchestration/steps/finalize-run-step.js +33 -2
- package/dist/pipeline/emit-eval-results.js +29 -11
- package/dist/pipeline/map-request-to-config.js +0 -4
- package/dist/pipeline/upload-test-outputs.d.ts +12 -5
- package/dist/pipeline/upload-test-outputs.js +27 -10
- package/package.json +3 -3
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +0 -14
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +0 -25
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +0 -94
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +0 -13
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +0 -138
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +0 -10
- package/dist/artifact-capture/comparator.d.ts +0 -22
- package/dist/artifact-capture/comparator.js +0 -493
- package/dist/artifact-capture/filesystem-collector.d.ts +0 -60
- package/dist/artifact-capture/filesystem-collector.js +0 -262
- package/dist/artifact-capture/gcs-collector.d.ts +0 -55
- package/dist/artifact-capture/gcs-collector.js +0 -117
- package/dist/commands/capture-compare.d.ts +0 -15
- package/dist/commands/capture-compare.js +0 -253
- package/dist/commands/capture-list.d.ts +0 -12
- package/dist/commands/capture-list.js +0 -150
- package/dist/commands/capture.d.ts +0 -9
- package/dist/commands/capture.js +0 -16
package/dist/composition-root.js
CHANGED
|
@@ -15,20 +15,22 @@
|
|
|
15
15
|
* @see packages/core/src/ports/context.ts — AppContext interface
|
|
16
16
|
* @see docs/archive/exec-plans/ports-and-adapters/phase-7-composition-root.md
|
|
17
17
|
*/
|
|
18
|
-
import {
|
|
19
|
-
import { InMemoryPluginRegistry, NoOpArtifactCollector, NoOpArtifactWriter, generateRunId, isArtifactType, } from "./_vendor/ailf-core/index.js";
|
|
18
|
+
import { ARTIFACT_EXPORT_PHASE_ID, InMemoryPluginRegistry, NoOpArtifactWriter, NoOpProgressReporter, generateRunId, isArtifactType, } from "./_vendor/ailf-core/index.js";
|
|
20
19
|
import { AccumulatingArtifactWriter } from "./artifact-capture/accumulating-artifact-writer.js";
|
|
21
20
|
import { ApiGatewayArtifactWriter } from "./artifact-capture/api-gateway-artifact-writer.js";
|
|
21
|
+
import { BatchingApiGatewayArtifactWriter } from "./artifact-capture/batching-api-gateway-artifact-writer.js";
|
|
22
22
|
import { FanoutArtifactWriter } from "./artifact-capture/fanout-artifact-writer.js";
|
|
23
|
-
import { FilesystemArtifactCollector } from "./artifact-capture/filesystem-collector.js";
|
|
24
|
-
import { GcsArtifactCollector } from "./artifact-capture/gcs-collector.js";
|
|
25
23
|
import { GcsArtifactWriter } from "./artifact-capture/gcs-artifact-writer.js";
|
|
24
|
+
import { InstrumentedArtifactWriter } from "./artifact-capture/instrumented-artifact-writer.js";
|
|
26
25
|
import { LocalFilesystemArtifactWriter } from "./artifact-capture/local-fs-artifact-writer.js";
|
|
26
|
+
import { resolveUploadConcurrency, setDefaultUploadConcurrency, } from "./artifact-capture/parallel-emit.js";
|
|
27
|
+
import { UploadMetrics } from "./artifact-capture/upload-metrics.js";
|
|
27
28
|
import { ContentLakeCacheAdapter } from "./adapters/cache/content-lake-cache.js";
|
|
28
29
|
import { loadExternalPresets } from "./pipeline/compiler/preset-loader.js";
|
|
29
30
|
import { FilesystemCache } from "./adapters/cache/filesystem-cache.js";
|
|
30
31
|
import { PromptfooEvalAdapter } from "./adapters/eval-runners/promptfoo-eval-adapter.js";
|
|
31
32
|
import { ConsoleLogger, JsonLogger, QuietLogger, } from "./adapters/loggers/index.js";
|
|
33
|
+
import { ConsoleProgressReporter } from "./adapters/progress/console-progress-reporter.js";
|
|
32
34
|
import { CompositeTaskSource, ContentLakeTaskSource, RepoTaskSource, } from "./adapters/task-sources/index.js";
|
|
33
35
|
import { createAgentHarnessBase, createKnowledgeProbeBase, createLiteracyModeBase, createMcpServerModeBase, } from "./pipeline/compiler/mode-bases/index.js";
|
|
34
36
|
import { createSanityLiteracyPreset } from "./pipeline/compiler/presets/index.js";
|
|
@@ -44,6 +46,9 @@ import { loadSinks } from "./sinks/index.js";
|
|
|
44
46
|
export function createAppContext(config) {
|
|
45
47
|
// Logger — selected by env var preferences
|
|
46
48
|
const logger = createLogger();
|
|
49
|
+
// Progress reporter — console-backed for the default logger; no-op for
|
|
50
|
+
// JSON/quiet modes and tests where interactive output is inappropriate.
|
|
51
|
+
const progress = createProgressReporter();
|
|
47
52
|
// Cache — filesystem, optionally decorated with Content Lake fallback
|
|
48
53
|
const cache = config.noCache ? undefined : createCache(config);
|
|
49
54
|
// Task source — selected by config.taskSourceType
|
|
@@ -63,33 +68,16 @@ export function createAppContext(config) {
|
|
|
63
68
|
const reportStore = createReportStore(config);
|
|
64
69
|
// Sinks — loaded from config/sinks
|
|
65
70
|
const sinks = loadSinks();
|
|
66
|
-
// Artifact collector — no-op by default, filesystem when --capture is set,
|
|
67
|
-
// GCS decorator when --capture-gcs-bucket is also provided (D0030/W0035)
|
|
68
|
-
let collector = new NoOpArtifactCollector();
|
|
69
|
-
if (config.captureEnabled) {
|
|
70
|
-
const fsCollector = new FilesystemArtifactCollector({
|
|
71
|
-
captureDir: config.captureDir ?? join(config.outputDir, "..", "captures"),
|
|
72
|
-
mode: config.mode,
|
|
73
|
-
compress: config.captureCompress ?? true,
|
|
74
|
-
extras: config.captureExtras ?? true,
|
|
75
|
-
pipeline: {
|
|
76
|
-
variant: config.variant,
|
|
77
|
-
source: config.source,
|
|
78
|
-
areas: config.areas,
|
|
79
|
-
},
|
|
80
|
-
});
|
|
81
|
-
collector = config.captureGcsBucket
|
|
82
|
-
? new GcsArtifactCollector(fsCollector, {
|
|
83
|
-
bucket: config.captureGcsBucket,
|
|
84
|
-
prefix: config.captureGcsPrefix,
|
|
85
|
-
})
|
|
86
|
-
: fsCollector;
|
|
87
|
-
}
|
|
88
71
|
// Artifact writer — writes run artifacts + manifest to GCS at known
|
|
89
72
|
// `runs/{runId}/…` paths (D0032). Auto-detects the right adapter from
|
|
90
73
|
// available credentials; defaults bucket to "ailf-artifacts". Set
|
|
91
74
|
// artifactUpload: false to opt out entirely.
|
|
92
|
-
|
|
75
|
+
// W0053 — writers receive a progress reporter scoped to a single
|
|
76
|
+
// `artifact-export` phase so the CLI can render per-batch updates.
|
|
77
|
+
const artifactWriter = createArtifactWriter(config, logger, {
|
|
78
|
+
reporter: progress,
|
|
79
|
+
phaseId: ARTIFACT_EXPORT_PHASE_ID,
|
|
80
|
+
});
|
|
93
81
|
// Generate the pipeline's RunId once; every downstream step reads it
|
|
94
82
|
// from the context (D0032).
|
|
95
83
|
const runId = generateRunId();
|
|
@@ -97,11 +85,11 @@ export function createAppContext(config) {
|
|
|
97
85
|
return {
|
|
98
86
|
artifactWriter,
|
|
99
87
|
cache,
|
|
100
|
-
collector,
|
|
101
88
|
config,
|
|
102
89
|
docFetcher,
|
|
103
90
|
evalRunner,
|
|
104
91
|
logger,
|
|
92
|
+
progress,
|
|
105
93
|
registry,
|
|
106
94
|
reportStore,
|
|
107
95
|
runId,
|
|
@@ -124,6 +112,23 @@ function createLogger() {
|
|
|
124
112
|
process.env.AILF_VERBOSE === "1",
|
|
125
113
|
});
|
|
126
114
|
}
|
|
115
|
+
/**
|
|
116
|
+
* Select a ProgressReporter adapter. Matches the logger environment — JSON
|
|
117
|
+
* and quiet loggers get a no-op reporter so machine-readable output stays
|
|
118
|
+
* clean; interactive sessions get the console adapter with verbose mirroring.
|
|
119
|
+
*/
|
|
120
|
+
function createProgressReporter() {
|
|
121
|
+
if (process.env.AILF_LOG_FORMAT === "json")
|
|
122
|
+
return new NoOpProgressReporter();
|
|
123
|
+
if (process.env.AILF_LOG_LEVEL === "quiet" ||
|
|
124
|
+
process.env.AILF_QUIET === "1") {
|
|
125
|
+
return new NoOpProgressReporter();
|
|
126
|
+
}
|
|
127
|
+
return new ConsoleProgressReporter({
|
|
128
|
+
verbose: process.env.AILF_LOG_LEVEL === "verbose" ||
|
|
129
|
+
process.env.AILF_VERBOSE === "1",
|
|
130
|
+
});
|
|
131
|
+
}
|
|
127
132
|
/**
|
|
128
133
|
* Shared GCS bucket for report artifacts. Matches the gateway default at
|
|
129
134
|
* packages/api/src/routes/artifacts.ts — both sides assume ailf-artifacts
|
|
@@ -155,7 +160,7 @@ const DEFAULT_LOCAL_ARTIFACTS_DIR = ".ailf/results/captures";
|
|
|
155
160
|
*
|
|
156
161
|
* Exported for unit-test access; not part of the public package API.
|
|
157
162
|
*/
|
|
158
|
-
export function createArtifactWriter(config, logger) {
|
|
163
|
+
export function createArtifactWriter(config, logger, progress) {
|
|
159
164
|
// Legacy `artifactUpload: false` still disables — treat as an alias for
|
|
160
165
|
// the canonical `artifactsDisabled: true` until W0052 removes it.
|
|
161
166
|
if (config.artifactsDisabled === true || config.artifactUpload === false) {
|
|
@@ -164,10 +169,27 @@ export function createArtifactWriter(config, logger) {
|
|
|
164
169
|
}
|
|
165
170
|
const exclude = resolveExcludeList(config.artifactsExclude, logger);
|
|
166
171
|
const rootDir = config.artifactsDir ?? DEFAULT_LOCAL_ARTIFACTS_DIR;
|
|
167
|
-
|
|
168
|
-
|
|
172
|
+
// W0056 — opt-in measurement of the upload path. The collector is passed
|
|
173
|
+
// to the remote writer (where sign/PUT/compose phases live) AND wraps the
|
|
174
|
+
// final writer to record caller-observed `emit`/`writeManifest` totals.
|
|
175
|
+
// `summarize()` fires from the decorator's `writeManifest` hook.
|
|
176
|
+
const metrics = process.env.AILF_UPLOAD_METRICS === "1"
|
|
177
|
+
? new UploadMetrics({
|
|
178
|
+
logger,
|
|
179
|
+
detailFile: `${rootDir}/upload-metrics/run-${Date.now()}.ndjson`,
|
|
180
|
+
})
|
|
181
|
+
: null;
|
|
182
|
+
// W0053: progress attaches to the OUTERMOST of (local-only | fanout). When
|
|
183
|
+
// fanout is wired, the delegates stay silent so we don't double-count the
|
|
184
|
+
// same caller-visible write across two backends.
|
|
185
|
+
const remote = createRemoteArtifactWriter(config, logger, metrics);
|
|
186
|
+
const local = new LocalFilesystemArtifactWriter({
|
|
187
|
+
rootDir,
|
|
188
|
+
exclude,
|
|
189
|
+
...(remote ? {} : { progress }),
|
|
190
|
+
});
|
|
169
191
|
const base = remote
|
|
170
|
-
? new FanoutArtifactWriter([local, remote])
|
|
192
|
+
? new FanoutArtifactWriter([local, remote], { progress })
|
|
171
193
|
: local;
|
|
172
194
|
if (!remote) {
|
|
173
195
|
logger.debug(`Artifact writer: LocalFilesystemArtifactWriter only (rootDir=${rootDir})`);
|
|
@@ -179,7 +201,10 @@ export function createArtifactWriter(config, logger) {
|
|
|
179
201
|
// RunManifest without each producer bookkeeping its own ArtifactRefs
|
|
180
202
|
// (W0051 Slice 3 revisit — Option B of the "manifest empty on real runs"
|
|
181
203
|
// fix).
|
|
182
|
-
|
|
204
|
+
const accumulating = new AccumulatingArtifactWriter(base);
|
|
205
|
+
return metrics
|
|
206
|
+
? new InstrumentedArtifactWriter(accumulating, metrics)
|
|
207
|
+
: accumulating;
|
|
183
208
|
}
|
|
184
209
|
/**
|
|
185
210
|
* Validate the exclude list against the registry. Unknown types are dropped
|
|
@@ -194,7 +219,7 @@ function resolveExcludeList(raw, logger) {
|
|
|
194
219
|
valid.push(name);
|
|
195
220
|
}
|
|
196
221
|
else {
|
|
197
|
-
logger.warn(`--
|
|
222
|
+
logger.warn(`--artifacts-exclude: "${name}" is not a known artifact type — ignored`);
|
|
198
223
|
}
|
|
199
224
|
}
|
|
200
225
|
return valid;
|
|
@@ -205,19 +230,54 @@ function resolveExcludeList(raw, logger) {
|
|
|
205
230
|
* the sole backend for that run, which is the D0033 M4 default for laptops
|
|
206
231
|
* and CI without GCS creds.
|
|
207
232
|
*/
|
|
208
|
-
function createRemoteArtifactWriter(config, logger) {
|
|
233
|
+
function createRemoteArtifactWriter(config, logger, metrics) {
|
|
209
234
|
const bucket = config.artifactGcsBucket ?? DEFAULT_ARTIFACT_BUCKET;
|
|
210
235
|
const hasGcsCredentials = Boolean(process.env.GOOGLE_APPLICATION_CREDENTIALS || process.env.GCLOUD_PROJECT);
|
|
211
236
|
if (hasGcsCredentials) {
|
|
212
|
-
|
|
213
|
-
|
|
237
|
+
// W0056 Phase 1: the GCS-direct path measured 0 failures at
|
|
238
|
+
// concurrency 8 with a 60 % pipeline-time reduction. Flip parallelism
|
|
239
|
+
// on by default on this path. `AILF_PARALLEL_UPLOAD=0` still forces
|
|
240
|
+
// serial as an escape hatch.
|
|
241
|
+
setDefaultUploadConcurrency(8);
|
|
242
|
+
logger.debug(`Artifact remote backend: GcsArtifactWriter (ADC, bucket=${bucket}, defaultConcurrency=8)`);
|
|
243
|
+
return new GcsArtifactWriter({
|
|
244
|
+
bucket,
|
|
245
|
+
...(metrics ? { metrics } : {}),
|
|
246
|
+
});
|
|
214
247
|
}
|
|
215
248
|
if (config.apiKey && config.apiUrl) {
|
|
216
|
-
|
|
249
|
+
// W0058 Phase 2: batching writer is the default on the API Gateway path.
|
|
250
|
+
// Prototype B (W0056) showed batch signing + client-side parallelism
|
|
251
|
+
// eliminates the 429 storm that single-URL parallelism triggered on the
|
|
252
|
+
// Vercel signing endpoint, at parity with the GCS-direct parallel path
|
|
253
|
+
// once the sign+PUT overlap optimization lands. Flip the default to 8
|
|
254
|
+
// concurrency; `AILF_PARALLEL_UPLOAD=0` forces serial as a rollback
|
|
255
|
+
// escape hatch and auto-selects the legacy single-URL writer.
|
|
256
|
+
setDefaultUploadConcurrency(8);
|
|
257
|
+
const concurrency = resolveUploadConcurrency();
|
|
258
|
+
if (concurrency > 1) {
|
|
259
|
+
logger.debug(`Artifact remote backend: BatchingApiGatewayArtifactWriter (via ${config.apiUrl}, bucket=${bucket}, putConcurrency=${concurrency})`);
|
|
260
|
+
// D0034: neither API Gateway writer supports NDJSON `appendNdjson`.
|
|
261
|
+
// Traces that flow through `appendNdjson` are dropped on this path.
|
|
262
|
+
// Surface the gap once at startup instead of ambushing users with a
|
|
263
|
+
// silent null ref at emit time.
|
|
264
|
+
logger.warn("Artifacts: API Gateway path selected without GCS ADC — " +
|
|
265
|
+
"trace (NDJSON) artifacts will be skipped (D0034). Set " +
|
|
266
|
+
"GOOGLE_APPLICATION_CREDENTIALS or GCLOUD_PROJECT to capture traces.");
|
|
267
|
+
return new BatchingApiGatewayArtifactWriter({
|
|
268
|
+
apiBaseUrl: config.apiUrl,
|
|
269
|
+
apiKey: config.apiKey,
|
|
270
|
+
bucket,
|
|
271
|
+
putConcurrency: concurrency,
|
|
272
|
+
...(metrics ? { metrics } : {}),
|
|
273
|
+
});
|
|
274
|
+
}
|
|
275
|
+
logger.debug(`Artifact remote backend: ApiGatewayArtifactWriter (via ${config.apiUrl}, bucket=${bucket}, serial — AILF_PARALLEL_UPLOAD=0 override)`);
|
|
217
276
|
return new ApiGatewayArtifactWriter({
|
|
218
277
|
apiBaseUrl: config.apiUrl,
|
|
219
278
|
apiKey: config.apiKey,
|
|
220
279
|
bucket,
|
|
280
|
+
...(metrics ? { metrics } : {}),
|
|
221
281
|
});
|
|
222
282
|
}
|
|
223
283
|
return null;
|
|
@@ -8,7 +8,6 @@
|
|
|
8
8
|
* Once all commands construct ResolvedConfig directly (or use --config),
|
|
9
9
|
* this bridge can be deleted.
|
|
10
10
|
*/
|
|
11
|
-
import { join } from "node:path";
|
|
12
11
|
import { createAppContext } from "../composition-root.js";
|
|
13
12
|
import { tryLoadConfigFile } from "../pipeline/compiler/config-loader.js";
|
|
14
13
|
/**
|
|
@@ -78,12 +77,10 @@ export function mapToResolvedConfig(opts, rootDir) {
|
|
|
78
77
|
remote: opts.remote ?? false,
|
|
79
78
|
apiUrl: opts.apiUrl ?? "https://ailf-api.sanity.build",
|
|
80
79
|
apiKey: opts.apiKey,
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
captureGcsBucket: process.env.AILF_CAPTURE_GCS_BUCKET,
|
|
86
|
-
captureGcsPrefix: process.env.AILF_CAPTURE_GCS_PREFIX,
|
|
80
|
+
artifactsDisabled: opts.artifactsDisabled,
|
|
81
|
+
artifactsDir: opts.artifactsDir,
|
|
82
|
+
artifactsDryRun: opts.artifactsDryRun,
|
|
83
|
+
artifactsExclude: opts.artifactsExclude,
|
|
87
84
|
artifactGcsBucket: process.env.AILF_GCS_ARTIFACT_BUCKET,
|
|
88
85
|
artifactUpload: parseArtifactUploadEnv(process.env.AILF_ARTIFACT_UPLOAD),
|
|
89
86
|
};
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
* each step completes. This enables the GET /v1/jobs/:jobId polling
|
|
12
12
|
* endpoint to show real-time progress.
|
|
13
13
|
*/
|
|
14
|
-
import { assoc, } from "../_vendor/ailf-core/index.js";
|
|
14
|
+
import { ARTIFACT_EXPORT_PHASE_ID, assoc, } from "../_vendor/ailf-core/index.js";
|
|
15
15
|
import { runStep } from "./step-runner.js";
|
|
16
16
|
// ---------------------------------------------------------------------------
|
|
17
17
|
// Job progress reporter
|
|
@@ -111,21 +111,6 @@ async function capturePipelineContext(ctx, state, results) {
|
|
|
111
111
|
ctx.logger.debug(`pipelineContext emit rejected: ${err instanceof Error ? err.message : String(err)}`);
|
|
112
112
|
}
|
|
113
113
|
}
|
|
114
|
-
/**
|
|
115
|
-
* Flush captured artifacts to disk. Non-blocking — failures are logged
|
|
116
|
-
* but never affect the pipeline result.
|
|
117
|
-
*/
|
|
118
|
-
async function flushArtifacts(ctx) {
|
|
119
|
-
if (!ctx.collector.enabled)
|
|
120
|
-
return;
|
|
121
|
-
try {
|
|
122
|
-
const result = await ctx.collector.flush();
|
|
123
|
-
ctx.logger.info(`Captured ${result.artifactCount} artifacts → ${result.destination}`);
|
|
124
|
-
}
|
|
125
|
-
catch (err) {
|
|
126
|
-
ctx.logger.warn(`Artifact capture flush failed: ${err instanceof Error ? err.message : err}`);
|
|
127
|
-
}
|
|
128
|
-
}
|
|
129
114
|
// ---------------------------------------------------------------------------
|
|
130
115
|
// Orchestrator
|
|
131
116
|
// ---------------------------------------------------------------------------
|
|
@@ -157,10 +142,16 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
157
142
|
if (hasJob) {
|
|
158
143
|
await reportJobProgress(ctx, steps[0]?.name ?? "init", 0, steps.length, "running", undefined, jobUpdates);
|
|
159
144
|
}
|
|
145
|
+
// W0053 — artifact export phase. Opens the first time a non-`run-eval`
|
|
146
|
+
// step starts, signalling the user that promptfoo's progress bar is done
|
|
147
|
+
// and the (previously silent) GCS export/upload window is now active.
|
|
148
|
+
// Closed in a finally after the step loop, regardless of pipeline outcome.
|
|
149
|
+
const exportPhase = createExportPhaseGate(ctx);
|
|
160
150
|
for (let i = 0; i < steps.length; i++) {
|
|
161
151
|
const step = steps[i];
|
|
162
152
|
ctx.logger.debug(`Starting step ${i + 1}/${steps.length}: ${step.name}`);
|
|
163
153
|
ctx.logger.section(step.name);
|
|
154
|
+
exportPhase.maybeOpen(step.name);
|
|
164
155
|
// Report current step progress
|
|
165
156
|
if (hasJob) {
|
|
166
157
|
await reportJobProgress(ctx, step.name, i, steps.length, "running", undefined, jobUpdates);
|
|
@@ -182,13 +173,11 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
182
173
|
step: step.name,
|
|
183
174
|
}, jobUpdates);
|
|
184
175
|
}
|
|
185
|
-
// Capture pipeline context
|
|
176
|
+
// Capture pipeline context before exiting. `job-updates` was an
|
|
177
|
+
// observability-only capture not tied to a registered artifact type;
|
|
178
|
+
// dropped in W0050. Use the JobStore path for job telemetry.
|
|
186
179
|
await capturePipelineContext(ctx, state, results);
|
|
187
|
-
|
|
188
|
-
// to a registered artifact type; dropped here. Use the JobStore
|
|
189
|
-
// path if job telemetry is needed.
|
|
190
|
-
// Flush captured artifacts even on failure (partial capture is useful)
|
|
191
|
-
await flushArtifacts(ctx);
|
|
180
|
+
exportPhase.close();
|
|
192
181
|
return {
|
|
193
182
|
belowCritical: state.belowCritical,
|
|
194
183
|
durationMs: Date.now() - pipelineStart,
|
|
@@ -245,8 +234,7 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
245
234
|
// Capture pipeline context. `job-updates` observability captures were
|
|
246
235
|
// dropped in Slice 6.1 — JobStore is the supported telemetry path.
|
|
247
236
|
await capturePipelineContext(ctx, state, results);
|
|
248
|
-
|
|
249
|
-
await flushArtifacts(ctx);
|
|
237
|
+
exportPhase.close();
|
|
250
238
|
return {
|
|
251
239
|
belowCritical: state.belowCritical,
|
|
252
240
|
durationMs,
|
|
@@ -257,3 +245,91 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
257
245
|
validation,
|
|
258
246
|
};
|
|
259
247
|
}
|
|
248
|
+
// ---------------------------------------------------------------------------
|
|
249
|
+
// Artifact export phase gate (W0053)
|
|
250
|
+
// ---------------------------------------------------------------------------
|
|
251
|
+
/**
|
|
252
|
+
* Returns a lazy gate that opens the `artifact-export` progress phase on the
|
|
253
|
+
* first step after `run-eval` and closes it on pipeline completion. The gate
|
|
254
|
+
* tolerates repeated opens / closes — each is a no-op after the first.
|
|
255
|
+
*
|
|
256
|
+
* The phase is keyed on step names rather than timestamps so the header lands
|
|
257
|
+
* exactly when the user sees promptfoo's `Evaluating` bar hit 100% and the
|
|
258
|
+
* next pipeline step takes over. `run-eval` produces artifacts too, but its
|
|
259
|
+
* own progress is owned by promptfoo; opening the phase before run-eval would
|
|
260
|
+
* produce a duplicate progress channel for the same window.
|
|
261
|
+
*/
|
|
262
|
+
function createExportPhaseGate(ctx) {
|
|
263
|
+
let opened = false;
|
|
264
|
+
let closed = false;
|
|
265
|
+
let startedAt = 0;
|
|
266
|
+
// Step names that run BEFORE the artifact-heavy post-eval section. The
|
|
267
|
+
// phase opens on the first step whose name is not in this set — typically
|
|
268
|
+
// `calculate-scores` once promptfoo has handed back control.
|
|
269
|
+
const preExportSteps = new Set([
|
|
270
|
+
"validate",
|
|
271
|
+
"mirror-repo-tasks",
|
|
272
|
+
"fetch-docs",
|
|
273
|
+
"generate-configs",
|
|
274
|
+
"grader-consistency",
|
|
275
|
+
]);
|
|
276
|
+
const { label, detail } = describeExportPhase(ctx);
|
|
277
|
+
return {
|
|
278
|
+
maybeOpen(stepName) {
|
|
279
|
+
if (opened)
|
|
280
|
+
return;
|
|
281
|
+
if (stepName.startsWith("run-eval"))
|
|
282
|
+
return;
|
|
283
|
+
if (preExportSteps.has(stepName))
|
|
284
|
+
return;
|
|
285
|
+
opened = true;
|
|
286
|
+
startedAt = Date.now();
|
|
287
|
+
ctx.progress.phaseStart({
|
|
288
|
+
phaseId: ARTIFACT_EXPORT_PHASE_ID,
|
|
289
|
+
label,
|
|
290
|
+
detail,
|
|
291
|
+
startedAt,
|
|
292
|
+
});
|
|
293
|
+
},
|
|
294
|
+
close() {
|
|
295
|
+
if (!opened || closed)
|
|
296
|
+
return;
|
|
297
|
+
closed = true;
|
|
298
|
+
// Cumulative counts live inside the reporter adapter (it accumulates
|
|
299
|
+
// each phaseProgress event). The orchestrator does not track the
|
|
300
|
+
// running total — it only knows when the phase is over. Adapters that
|
|
301
|
+
// render a final summary use their own state; NoOp / JSON adapters
|
|
302
|
+
// ignore the event.
|
|
303
|
+
ctx.progress.phaseComplete({
|
|
304
|
+
phaseId: ARTIFACT_EXPORT_PHASE_ID,
|
|
305
|
+
itemsCompleted: 0,
|
|
306
|
+
bytesCompleted: 0,
|
|
307
|
+
durationMs: Date.now() - startedAt,
|
|
308
|
+
});
|
|
309
|
+
},
|
|
310
|
+
};
|
|
311
|
+
}
|
|
312
|
+
/**
|
|
313
|
+
* Build the user-facing phase label by peeking at the wired writer chain.
|
|
314
|
+
* `AccumulatingArtifactWriter` wraps a `FanoutArtifactWriter([local, remote])`
|
|
315
|
+
* when remote credentials are present, or a bare `LocalFilesystemArtifactWriter`
|
|
316
|
+
* otherwise — naming the destination in the label keeps every progress line
|
|
317
|
+
* self-describing.
|
|
318
|
+
*/
|
|
319
|
+
function describeExportPhase(ctx) {
|
|
320
|
+
const writer = ctx.artifactWriter;
|
|
321
|
+
const inner = writer.inner?.constructor.name ?? writer.constructor.name;
|
|
322
|
+
if (inner === "FanoutArtifactWriter") {
|
|
323
|
+
return { label: "Exporting run artifacts", detail: "local + GCS" };
|
|
324
|
+
}
|
|
325
|
+
if (inner === "GcsArtifactWriter") {
|
|
326
|
+
return { label: "Exporting run artifacts", detail: "GCS" };
|
|
327
|
+
}
|
|
328
|
+
if (inner === "ApiGatewayArtifactWriter") {
|
|
329
|
+
return { label: "Exporting run artifacts", detail: "API gateway" };
|
|
330
|
+
}
|
|
331
|
+
if (inner === "NoOpArtifactWriter") {
|
|
332
|
+
return { label: "Finalizing run" };
|
|
333
|
+
}
|
|
334
|
+
return { label: "Exporting run artifacts", detail: "local" };
|
|
335
|
+
}
|
|
@@ -146,7 +146,7 @@ export class CalculateScoresStep {
|
|
|
146
146
|
// W0050 — ctx.artifactWriter is always present; no guard needed.
|
|
147
147
|
const testResults = tryReadTestResults(ctx.config.rootDir);
|
|
148
148
|
if (testResults?.length) {
|
|
149
|
-
const artifactRef = await uploadTestOutputs(ctx.artifactWriter, ctx.runId, testResults);
|
|
149
|
+
const artifactRef = await uploadTestOutputs(ctx.artifactWriter, ctx.runId, testResults, ctx.config.mode);
|
|
150
150
|
if (artifactRef) {
|
|
151
151
|
state.artifactRefs = {
|
|
152
152
|
...state.artifactRefs,
|
|
@@ -18,9 +18,36 @@
|
|
|
18
18
|
import { existsSync, readFileSync } from "node:fs";
|
|
19
19
|
import { resolve } from "node:path";
|
|
20
20
|
import { AccumulatingArtifactWriter } from "../../artifact-capture/accumulating-artifact-writer.js";
|
|
21
|
+
import { InstrumentedArtifactWriter } from "../../artifact-capture/instrumented-artifact-writer.js";
|
|
21
22
|
import { buildRunContext } from "../../pipeline/run-context.js";
|
|
22
23
|
import { loadSource } from "../../sources.js";
|
|
23
24
|
import { configToSourceOverrides } from "../config-to-source-overrides.js";
|
|
25
|
+
/**
|
|
26
|
+
* Walk a writer's `inner` decorator chain looking for an
|
|
27
|
+
* `AccumulatingArtifactWriter`. Composition root wraps the accumulator
|
|
28
|
+
* in `InstrumentedArtifactWriter` when `AILF_UPLOAD_METRICS=1`, so a
|
|
29
|
+
* naive `instanceof AccumulatingArtifactWriter` check misses it and the
|
|
30
|
+
* manifest comes out empty. Rather than teaching every caller about the
|
|
31
|
+
* instrumentation wrapper, unwrap once here.
|
|
32
|
+
*
|
|
33
|
+
* `MAX_DEPTH` is a safety belt against a future decorator chain
|
|
34
|
+
* accidentally introducing a cycle — the current writers can't, but one
|
|
35
|
+
* `inner` self-reference would otherwise spin forever.
|
|
36
|
+
*/
|
|
37
|
+
const FIND_ACCUMULATOR_MAX_DEPTH = 8;
|
|
38
|
+
function findAccumulator(writer) {
|
|
39
|
+
let cursor = writer;
|
|
40
|
+
for (let depth = 0; cursor && depth < FIND_ACCUMULATOR_MAX_DEPTH; depth++) {
|
|
41
|
+
if (cursor instanceof AccumulatingArtifactWriter)
|
|
42
|
+
return cursor;
|
|
43
|
+
if (cursor instanceof InstrumentedArtifactWriter) {
|
|
44
|
+
cursor = cursor.inner;
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
24
51
|
export class FinalizeRunStep {
|
|
25
52
|
pipelineStart;
|
|
26
53
|
options;
|
|
@@ -64,8 +91,12 @@ export class FinalizeRunStep {
|
|
|
64
91
|
// happened to register manually. When the writer is a NoOp / plain
|
|
65
92
|
// decorator without accumulation, `aggregated` stays empty and the
|
|
66
93
|
// manifest falls back to the producer-side registration.
|
|
67
|
-
|
|
68
|
-
|
|
94
|
+
//
|
|
95
|
+
// W0058: `findAccumulator` unwraps `InstrumentedArtifactWriter` too
|
|
96
|
+
// so the manifest stays fully populated when `AILF_UPLOAD_METRICS=1`.
|
|
97
|
+
const accumulator = findAccumulator(ctx.artifactWriter);
|
|
98
|
+
const aggregated = accumulator
|
|
99
|
+
? accumulator.getAccumulatedArtifactRefs()
|
|
69
100
|
: {};
|
|
70
101
|
const artifacts = {
|
|
71
102
|
...aggregated,
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
* still live inside the full `rawResults` object.
|
|
28
28
|
*/
|
|
29
29
|
import { readFileSync } from "node:fs";
|
|
30
|
-
import { classifyRubric, parseRubricScore, } from "../_vendor/ailf-core/index.js";
|
|
30
|
+
import { classifyRubric, parseRubricScore, resolveVariantMode, } from "../_vendor/ailf-core/index.js";
|
|
31
31
|
// ---------------------------------------------------------------------------
|
|
32
32
|
// Public entry point
|
|
33
33
|
// ---------------------------------------------------------------------------
|
|
@@ -57,23 +57,40 @@ export async function emitPerEntryEvalResults(writer, ctx, mode, resultsPath) {
|
|
|
57
57
|
console.warn(` ⚠️ emitPerEntryEvalResults: ${resultsPath} has no results[]`);
|
|
58
58
|
return;
|
|
59
59
|
}
|
|
60
|
+
// W0058: fire every emit synchronously and `Promise.all` once at the
|
|
61
|
+
// end. The previous `parallelMap` pattern created producer-side
|
|
62
|
+
// backpressure (each worker was blocked on its own `await emit(...)`),
|
|
63
|
+
// which kept the batching writer's queue shallow (≤ producer
|
|
64
|
+
// concurrency). With queueing delegated entirely to the writer, the
|
|
65
|
+
// batching API-Gateway writer gets a fully-populated pending queue
|
|
66
|
+
// and can pack ~hundreds of entries into a single batch-sign RTT;
|
|
67
|
+
// the GCS-direct writer's own `ConcurrencyLimiter` caps the PUT
|
|
68
|
+
// fan-out so the 1 500-concurrent-PUT scenario cannot happen.
|
|
69
|
+
const emits = [];
|
|
60
70
|
for (const result of rows) {
|
|
61
|
-
const
|
|
71
|
+
const rawTaskId = result.testCase?.description ?? "unknown-task";
|
|
62
72
|
const modelId = result.provider?.id ?? result.provider?.label ?? "unknown-model";
|
|
73
|
+
// D0033 axis convention: literacy-mode task descriptions carry a
|
|
74
|
+
// `(gold)` / `(baseline)` suffix; that variant IS the `mode` axis
|
|
75
|
+
// value. Stripping the suffix here keeps the writer's key aligned
|
|
76
|
+
// with what `slim-report-summary#slimJudgments` and the Studio
|
|
77
|
+
// `testOutputsKeyFor` hook compute on the read side. Without this,
|
|
78
|
+
// signed-URL lookups for grader/judgment artifacts 404.
|
|
79
|
+
const { mode: axisMode, task: axisTask } = resolveVariantMode(rawTaskId, mode);
|
|
63
80
|
const baseAssoc = {
|
|
64
81
|
run: ctx.runId,
|
|
65
|
-
mode,
|
|
66
|
-
task:
|
|
82
|
+
mode: axisMode,
|
|
83
|
+
task: axisTask,
|
|
67
84
|
model: modelId,
|
|
68
85
|
};
|
|
69
86
|
// rawResults — full raw entry (bounded by descriptor capBytes: 1 MB)
|
|
70
|
-
|
|
87
|
+
emits.push(writer.emit("rawResults", baseAssoc, result));
|
|
71
88
|
// renderedPrompts — what the model saw + which provider it went to
|
|
72
89
|
if (result.prompt !== undefined) {
|
|
73
|
-
|
|
90
|
+
emits.push(writer.emit("renderedPrompts", baseAssoc, {
|
|
74
91
|
prompt: result.prompt,
|
|
75
92
|
provider: result.provider,
|
|
76
|
-
});
|
|
93
|
+
}));
|
|
77
94
|
}
|
|
78
95
|
// Per-grader decomposition — only LLM-rubric assertions have a
|
|
79
96
|
// natural grader identity. Code assertions (javascript/contains/…)
|
|
@@ -86,15 +103,16 @@ export async function emitPerEntryEvalResults(writer, ctx, mode, resultsPath) {
|
|
|
86
103
|
if (!dimension)
|
|
87
104
|
continue;
|
|
88
105
|
const graderAssoc = { ...baseAssoc, grader: dimension };
|
|
89
|
-
|
|
106
|
+
emits.push(writer.emit("graderPrompts", graderAssoc, {
|
|
90
107
|
dimension,
|
|
91
108
|
assertion: comp.assertion,
|
|
92
|
-
});
|
|
93
|
-
|
|
109
|
+
}));
|
|
110
|
+
emits.push(writer.emit("graderJudgments", graderAssoc, {
|
|
94
111
|
score: parseRubricScore(comp) ?? 0,
|
|
95
112
|
reason: comp.reason ?? "",
|
|
96
113
|
pass: comp.pass,
|
|
97
|
-
});
|
|
114
|
+
}));
|
|
98
115
|
}
|
|
99
116
|
}
|
|
117
|
+
await Promise.all(emits);
|
|
100
118
|
}
|
|
@@ -74,10 +74,6 @@ export function mapRequestToConfig(request, rootDir) {
|
|
|
74
74
|
callerGit: request.callerGit,
|
|
75
75
|
callback: request.callback,
|
|
76
76
|
jobId: request.jobId,
|
|
77
|
-
captureEnabled: false,
|
|
78
|
-
captureDir: undefined,
|
|
79
|
-
captureCompress: true,
|
|
80
|
-
captureExtras: true,
|
|
81
77
|
remote: false,
|
|
82
78
|
apiUrl: "https://ailf-api.sanity.build",
|
|
83
79
|
presets: request.presets,
|
|
@@ -15,12 +15,19 @@
|
|
|
15
15
|
*
|
|
16
16
|
* @see docs/decisions/D0032-run-anchored-artifact-store.md
|
|
17
17
|
*/
|
|
18
|
-
import type
|
|
18
|
+
import { type ArtifactRef, type ArtifactWriter, type RunId, type StoredTestResult } from "../_vendor/ailf-core/index.d.ts";
|
|
19
19
|
/**
|
|
20
20
|
* Upload testOutputs as per-entry GCS objects under
|
|
21
|
-
* `runs/{runId}/test-outputs/`, one per `
|
|
21
|
+
* `runs/{runId}/test-outputs/`, one per `(mode, task, model)` triple.
|
|
22
22
|
*
|
|
23
|
-
*
|
|
24
|
-
*
|
|
23
|
+
* D0033 axis convention: literacy-mode taskIds carry a `(gold)` / `(baseline)`
|
|
24
|
+
* suffix whose value is the `mode` axis on the artifact — stripped via
|
|
25
|
+
* `resolveVariantMode`. This aligns the on-disk key with what the Studio
|
|
26
|
+
* hover-prefetch (`testOutputsKeyFor`) and slim-report readers compute; the
|
|
27
|
+
* legacy 2-segment form produced by the pre-D0033 writer 404'd on the new
|
|
28
|
+
* 3-segment read path.
|
|
29
|
+
*
|
|
30
|
+
* Returns the first non-null `ArtifactRef` emitted, or `null` when upload is
|
|
31
|
+
* skipped / every emit fails (P5: non-blocking).
|
|
25
32
|
*/
|
|
26
|
-
export declare function uploadTestOutputs(writer: ArtifactWriter, runId: RunId, testResults: StoredTestResult[]): Promise<ArtifactRef | null>;
|
|
33
|
+
export declare function uploadTestOutputs(writer: ArtifactWriter, runId: RunId, testResults: StoredTestResult[], defaultMode: string): Promise<ArtifactRef | null>;
|
|
@@ -15,20 +15,37 @@
|
|
|
15
15
|
*
|
|
16
16
|
* @see docs/decisions/D0032-run-anchored-artifact-store.md
|
|
17
17
|
*/
|
|
18
|
+
import { resolveVariantMode, } from "../_vendor/ailf-core/index.js";
|
|
18
19
|
/**
|
|
19
20
|
* Upload testOutputs as per-entry GCS objects under
|
|
20
|
-
* `runs/{runId}/test-outputs/`, one per `
|
|
21
|
+
* `runs/{runId}/test-outputs/`, one per `(mode, task, model)` triple.
|
|
21
22
|
*
|
|
22
|
-
*
|
|
23
|
-
*
|
|
23
|
+
* D0033 axis convention: literacy-mode taskIds carry a `(gold)` / `(baseline)`
|
|
24
|
+
* suffix whose value is the `mode` axis on the artifact — stripped via
|
|
25
|
+
* `resolveVariantMode`. This aligns the on-disk key with what the Studio
|
|
26
|
+
* hover-prefetch (`testOutputsKeyFor`) and slim-report readers compute; the
|
|
27
|
+
* legacy 2-segment form produced by the pre-D0033 writer 404'd on the new
|
|
28
|
+
* 3-segment read path.
|
|
29
|
+
*
|
|
30
|
+
* Returns the first non-null `ArtifactRef` emitted, or `null` when upload is
|
|
31
|
+
* skipped / every emit fails (P5: non-blocking).
|
|
24
32
|
*/
|
|
25
|
-
export async function uploadTestOutputs(writer, runId, testResults) {
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
33
|
+
export async function uploadTestOutputs(writer, runId, testResults, defaultMode) {
|
|
34
|
+
// W0058: fire every emit synchronously and `Promise.all` once — the
|
|
35
|
+
// writer (batching or GCS-direct) owns concurrency bounds. See the
|
|
36
|
+
// equivalent rationale in `emit-eval-results.ts`.
|
|
37
|
+
const emits = testResults.map((tr) => {
|
|
38
|
+
const { mode: axisMode, task: axisTask } = resolveVariantMode(tr.taskId, defaultMode);
|
|
39
|
+
return writer.emit("testOutputs", { run: runId, mode: axisMode, task: axisTask, model: tr.modelId }, {
|
|
29
40
|
responseOutput: tr.responseOutput ?? "",
|
|
30
41
|
responseOutputTruncated: tr.responseOutputTruncated ?? false,
|
|
31
|
-
}
|
|
32
|
-
})
|
|
33
|
-
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
const refs = await Promise.all(emits);
|
|
45
|
+
let lastRef = null;
|
|
46
|
+
for (const ref of refs) {
|
|
47
|
+
if (ref)
|
|
48
|
+
lastRef = ref;
|
|
49
|
+
}
|
|
50
|
+
return lastRef;
|
|
34
51
|
}
|