@sanity/ailf 2.8.0 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_vendor/ailf-core/artifact-capture/association.d.ts +35 -0
- package/dist/_vendor/ailf-core/artifact-capture/association.js +28 -0
- package/dist/_vendor/ailf-core/artifact-registry.d.ts +124 -23
- package/dist/_vendor/ailf-core/artifact-registry.js +724 -63
- package/dist/_vendor/ailf-core/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/index.js +2 -1
- package/dist/_vendor/ailf-core/ports/artifact-writer.d.ts +59 -20
- package/dist/_vendor/ailf-core/ports/artifact-writer.js +33 -10
- package/dist/_vendor/ailf-core/ports/context.d.ts +21 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +6 -6
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -0
- package/dist/_vendor/ailf-core/services/index.js +1 -0
- package/dist/_vendor/ailf-core/services/slim-report-summary.d.ts +31 -0
- package/dist/_vendor/ailf-core/services/slim-report-summary.js +217 -0
- package/dist/_vendor/ailf-core/types/branded-ids.d.ts +33 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +202 -23
- package/dist/artifact-capture/accumulating-artifact-writer.d.ts +50 -0
- package/dist/artifact-capture/accumulating-artifact-writer.js +111 -0
- package/dist/artifact-capture/api-gateway-artifact-writer.d.ts +17 -4
- package/dist/artifact-capture/api-gateway-artifact-writer.js +58 -7
- package/dist/artifact-capture/emit-file.d.ts +28 -0
- package/dist/artifact-capture/emit-file.js +56 -0
- package/dist/artifact-capture/fanout-artifact-writer.d.ts +39 -0
- package/dist/artifact-capture/fanout-artifact-writer.js +76 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +22 -4
- package/dist/artifact-capture/filesystem-collector.js +48 -23
- package/dist/artifact-capture/gcs-artifact-writer.d.ts +40 -3
- package/dist/artifact-capture/gcs-artifact-writer.js +238 -14
- package/dist/artifact-capture/local-fs-artifact-writer.d.ts +71 -0
- package/dist/artifact-capture/local-fs-artifact-writer.js +273 -0
- package/dist/commands/explain-handler.js +4 -0
- package/dist/commands/pipeline-action.d.ts +5 -0
- package/dist/commands/pipeline-action.js +56 -5
- package/dist/commands/pipeline.d.ts +4 -0
- package/dist/commands/pipeline.js +6 -2
- package/dist/commands/publish.js +4 -1
- package/dist/composition-root.d.ts +13 -10
- package/dist/composition-root.js +74 -20
- package/dist/orchestration/pipeline-orchestrator.d.ts +1 -1
- package/dist/orchestration/pipeline-orchestrator.js +41 -30
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -1
- package/dist/orchestration/steps/calculate-scores-step.js +19 -19
- package/dist/orchestration/steps/callback-step.d.ts +1 -1
- package/dist/orchestration/steps/callback-step.js +6 -4
- package/dist/orchestration/steps/compare-step.d.ts +1 -1
- package/dist/orchestration/steps/compare-step.js +4 -2
- package/dist/orchestration/steps/discovery-report-step.d.ts +1 -1
- package/dist/orchestration/steps/discovery-report-step.js +4 -1
- package/dist/orchestration/steps/fetch-docs-step.js +9 -15
- package/dist/orchestration/steps/finalize-run-step.js +21 -7
- package/dist/orchestration/steps/gap-analysis-step.js +34 -6
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -1
- package/dist/orchestration/steps/generate-configs-step.js +11 -11
- package/dist/orchestration/steps/publish-report-step.d.ts +1 -1
- package/dist/orchestration/steps/publish-report-step.js +24 -19
- package/dist/orchestration/steps/readiness-step.d.ts +1 -1
- package/dist/orchestration/steps/readiness-step.js +4 -1
- package/dist/orchestration/steps/report-step.d.ts +1 -1
- package/dist/orchestration/steps/report-step.js +6 -3
- package/dist/orchestration/steps/run-eval-step.js +14 -9
- package/dist/pipeline/compare.d.ts +2 -2
- package/dist/pipeline/emit-eval-results.d.ts +38 -0
- package/dist/pipeline/emit-eval-results.js +100 -0
- package/package.json +1 -1
|
@@ -263,13 +263,40 @@ export function computeResolvedOptions(opts) {
|
|
|
263
263
|
tagOption,
|
|
264
264
|
taskSourceType: resolvedTaskSourceType,
|
|
265
265
|
urlArgs,
|
|
266
|
-
captureEnabled: opts.capture
|
|
267
|
-
captureDir: opts
|
|
266
|
+
captureEnabled: opts.capture,
|
|
267
|
+
captureDir: resolveArtifactsDir(opts),
|
|
268
268
|
captureCompress: opts.captureCompress !== false &&
|
|
269
269
|
process.env.AILF_CAPTURE_COMPRESS !== "0",
|
|
270
270
|
captureExtras: opts.captureExtras !== false && process.env.AILF_CAPTURE_EXTRAS !== "0",
|
|
271
|
+
artifactsDisabled: opts.artifacts === false,
|
|
272
|
+
artifactsDir: resolveArtifactsDir(opts),
|
|
273
|
+
artifactsDryRun: opts.artifactsDryRun,
|
|
274
|
+
artifactsExclude: parseCaptureExcludeList(opts.captureExclude),
|
|
271
275
|
};
|
|
272
276
|
}
|
|
277
|
+
/**
|
|
278
|
+
* Resolve the artifacts / capture output directory from CLI flags and env
|
|
279
|
+
* vars. Precedence (highest first):
|
|
280
|
+
* 1. `--artifacts-dir` flag
|
|
281
|
+
* 2. `--capture-dir` flag (deprecated alias; no warning — silent rewrite)
|
|
282
|
+
* 3. `AILF_ARTIFACTS_DIR` env var
|
|
283
|
+
* 4. `AILF_CAPTURE_DIR` env var (deprecated alias; silent)
|
|
284
|
+
*/
|
|
285
|
+
function resolveArtifactsDir(opts) {
|
|
286
|
+
return (opts.artifactsDir ??
|
|
287
|
+
opts.captureDir ??
|
|
288
|
+
process.env.AILF_ARTIFACTS_DIR ??
|
|
289
|
+
process.env.AILF_CAPTURE_DIR);
|
|
290
|
+
}
|
|
291
|
+
function parseCaptureExcludeList(raw) {
|
|
292
|
+
if (!raw)
|
|
293
|
+
return undefined;
|
|
294
|
+
const list = raw
|
|
295
|
+
.split(",")
|
|
296
|
+
.map((s) => s.trim())
|
|
297
|
+
.filter(Boolean);
|
|
298
|
+
return list.length > 0 ? list : undefined;
|
|
299
|
+
}
|
|
273
300
|
/** Resolve and validate the --task-source flag value. */
|
|
274
301
|
function resolveTaskSourceType(raw) {
|
|
275
302
|
if (!raw || raw === "content-lake")
|
|
@@ -282,6 +309,20 @@ function resolveTaskSourceType(raw) {
|
|
|
282
309
|
// ---------------------------------------------------------------------------
|
|
283
310
|
// Pipeline entry point
|
|
284
311
|
// ---------------------------------------------------------------------------
|
|
312
|
+
/**
|
|
313
|
+
* Module-level flag so the `--capture` deprecation warning fires exactly
|
|
314
|
+
* once per process even when `executePipeline` is invoked multiple times
|
|
315
|
+
* (e.g. tests, long-lived dev loops).
|
|
316
|
+
*/
|
|
317
|
+
let warnedCaptureDeprecation = false;
|
|
318
|
+
function warnCaptureDeprecationIfNeeded(cliOpts) {
|
|
319
|
+
if (!cliOpts.capture)
|
|
320
|
+
return;
|
|
321
|
+
if (warnedCaptureDeprecation)
|
|
322
|
+
return;
|
|
323
|
+
warnedCaptureDeprecation = true;
|
|
324
|
+
console.warn("--capture is deprecated and will be removed in a future release; use --artifacts-dir or --no-artifacts instead");
|
|
325
|
+
}
|
|
285
326
|
/**
|
|
286
327
|
* Execute the evaluation pipeline.
|
|
287
328
|
*
|
|
@@ -291,6 +332,7 @@ function resolveTaskSourceType(raw) {
|
|
|
291
332
|
* 4. Delegate to the PipelineOrchestrator
|
|
292
333
|
*/
|
|
293
334
|
export async function executePipeline(cliOpts) {
|
|
335
|
+
warnCaptureDeprecationIfNeeded(cliOpts);
|
|
294
336
|
// When --config is provided, resolve config from file instead of CLI flags
|
|
295
337
|
if (cliOpts.config) {
|
|
296
338
|
const { existsSync } = await import("fs");
|
|
@@ -317,9 +359,11 @@ export async function executePipeline(cliOpts) {
|
|
|
317
359
|
config.outputDir = resolveOutputDir(cliOpts.outputDir);
|
|
318
360
|
// Capture options — CLI flags and env vars aren't in the config file,
|
|
319
361
|
// so merge them here (same logic as resolveOptions).
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
362
|
+
// AILF_CAPTURE is a no-op post-W0049; only the flag toggles captureEnabled.
|
|
363
|
+
config.captureEnabled = cliOpts.capture;
|
|
364
|
+
const resolvedArtifactsDir = resolveArtifactsDir(cliOpts);
|
|
365
|
+
if (resolvedArtifactsDir) {
|
|
366
|
+
config.captureDir = resolvedArtifactsDir;
|
|
323
367
|
}
|
|
324
368
|
config.captureCompress =
|
|
325
369
|
cliOpts.captureCompress !== false &&
|
|
@@ -328,6 +372,13 @@ export async function executePipeline(cliOpts) {
|
|
|
328
372
|
cliOpts.captureExtras !== false && process.env.AILF_CAPTURE_EXTRAS !== "0";
|
|
329
373
|
config.captureGcsBucket ??= process.env.AILF_CAPTURE_GCS_BUCKET;
|
|
330
374
|
config.captureGcsPrefix ??= process.env.AILF_CAPTURE_GCS_PREFIX;
|
|
375
|
+
config.artifactsDisabled ??= cliOpts.artifacts === false;
|
|
376
|
+
config.artifactsDir ??= resolvedArtifactsDir;
|
|
377
|
+
config.artifactsDryRun ??= cliOpts.artifactsDryRun;
|
|
378
|
+
const excludeList = parseCaptureExcludeList(cliOpts.captureExclude);
|
|
379
|
+
if (excludeList) {
|
|
380
|
+
config.artifactsExclude = excludeList;
|
|
381
|
+
}
|
|
331
382
|
config.artifactGcsBucket ??= process.env.AILF_GCS_ARTIFACT_BUCKET;
|
|
332
383
|
config.artifactUpload ??= parseArtifactUploadEnv(process.env.AILF_ARTIFACT_UPLOAD);
|
|
333
384
|
// Create AppContext directly from the merged config so adapters
|
|
@@ -68,5 +68,9 @@ export interface PipelineCliOptions {
|
|
|
68
68
|
captureDir?: string;
|
|
69
69
|
captureCompress: boolean;
|
|
70
70
|
captureExtras: boolean;
|
|
71
|
+
artifacts: boolean;
|
|
72
|
+
artifactsDir?: string;
|
|
73
|
+
artifactsDryRun: boolean;
|
|
74
|
+
captureExclude?: string;
|
|
71
75
|
}
|
|
72
76
|
export declare function createPipelineCommand(): Command;
|
|
@@ -54,10 +54,14 @@ export function createPipelineCommand() {
|
|
|
54
54
|
.option("--repo-tasks-path <path>", "Path to repo-based task definitions (.ailf/tasks/ directory)")
|
|
55
55
|
.option("--remote", "Submit evaluation to the AILF API instead of running locally", false)
|
|
56
56
|
.option("--api-url <url>", "AILF API base URL (default: https://ailf-api.sanity.build)")
|
|
57
|
-
.option("--capture", "Enable artifact capture
|
|
58
|
-
.option("--capture-dir <path>", "
|
|
57
|
+
.option("--capture", "[DEPRECATED] Enable legacy artifact capture. Use --artifacts-dir / --no-artifacts instead.", false)
|
|
58
|
+
.option("--capture-dir <path>", "[DEPRECATED] Alias for --artifacts-dir.")
|
|
59
59
|
.option("--no-capture-compress", "Disable tar.gz compression of captures")
|
|
60
60
|
.option("--no-capture-extras", "Exclude mode-specific artifacts from captures")
|
|
61
|
+
.option("--no-artifacts", "Disable all artifact writers (D0033). Overrides --artifacts-dir.")
|
|
62
|
+
.option("--artifacts-dir <path>", "Root directory for local artifact output (D0033; default: .ailf/results/captures/)")
|
|
63
|
+
.option("--artifacts-dry-run", "Run artifact writers in dry-run mode — log intended writes, touch no storage", false)
|
|
64
|
+
.option("--capture-exclude <types>", "Comma-separated artifact types to skip (e.g. traces,graderPrompts)")
|
|
61
65
|
.action(async (opts) => {
|
|
62
66
|
const { executePipeline } = await import("./pipeline-action.js");
|
|
63
67
|
await executePipeline(opts);
|
package/dist/commands/publish.js
CHANGED
|
@@ -27,6 +27,7 @@ import { addOutputDirOption } from "./shared/options.js";
|
|
|
27
27
|
import { getCallerCwd, resolveOutputDir } from "./shared/resolve-output-dir.js";
|
|
28
28
|
import { buildProvenance, } from "../pipeline/provenance.js";
|
|
29
29
|
import { generateReportTitle } from "../pipeline/report-title.js";
|
|
30
|
+
import { buildSlimReportSummary } from "../_vendor/ailf-core/index.js";
|
|
30
31
|
import { generateReportId, } from "../report-store.js";
|
|
31
32
|
import { withRetry } from "../sinks/retry.js";
|
|
32
33
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
@@ -174,13 +175,15 @@ async function runPublishCommand(summaryPath, outputDir, opts) {
|
|
|
174
175
|
}
|
|
175
176
|
const reportId = generateReportId();
|
|
176
177
|
const title = generateReportTitle({ provenance });
|
|
178
|
+
// W0051 Slice 3: slim the summary at publish time.
|
|
179
|
+
const slimSummary = buildSlimReportSummary(summary, provenance.mode);
|
|
177
180
|
const report = {
|
|
178
181
|
comparison: comparison ?? undefined,
|
|
179
182
|
completedAt: now,
|
|
180
183
|
durationMs: 0, // manual publish — no pipeline duration
|
|
181
184
|
id: reportId,
|
|
182
185
|
provenance,
|
|
183
|
-
summary,
|
|
186
|
+
summary: slimSummary,
|
|
184
187
|
tag: opts.tag,
|
|
185
188
|
title,
|
|
186
189
|
};
|
|
@@ -24,21 +24,24 @@ import { type AppContext, type ArtifactWriter, type AssertionRegistration, type
|
|
|
24
24
|
*/
|
|
25
25
|
export declare function createAppContext(config: ResolvedConfig): AppContext;
|
|
26
26
|
/**
|
|
27
|
-
* Selects
|
|
27
|
+
* Selects the `ArtifactWriter` wiring per D0033 M4:
|
|
28
28
|
*
|
|
29
|
-
*
|
|
30
|
-
*
|
|
31
|
-
* 2.
|
|
32
|
-
*
|
|
33
|
-
*
|
|
29
|
+
* 1. `--no-artifacts` (`config.artifactsDisabled === true`, or legacy
|
|
30
|
+
* `config.artifactUpload === false`) → `NoOpArtifactWriter`.
|
|
31
|
+
* 2. Otherwise: always attach `LocalFilesystemArtifactWriter` under
|
|
32
|
+
* `--artifacts-dir` (default `.ailf/results/captures`).
|
|
33
|
+
* 3. When a remote backend is reachable (ADC, GCLOUD_PROJECT, or an
|
|
34
|
+
* AILF API key + URL), layer it via `FanoutArtifactWriter([local, gcs])`.
|
|
35
|
+
* Local is listed first so a local success + remote failure still
|
|
36
|
+
* produces a non-null ref.
|
|
34
37
|
*
|
|
35
|
-
*
|
|
36
|
-
*
|
|
37
|
-
*
|
|
38
|
+
* Always returns a writer — pipeline code can assume `ctx.artifactWriter`
|
|
39
|
+
* is present. Producers post-W0050 drop their `if (ctx.artifactWriter)`
|
|
40
|
+
* guards in Slice 6.
|
|
38
41
|
*
|
|
39
42
|
* Exported for unit-test access; not part of the public package API.
|
|
40
43
|
*/
|
|
41
|
-
export declare function createArtifactWriter(config: ResolvedConfig, logger: Logger): ArtifactWriter
|
|
44
|
+
export declare function createArtifactWriter(config: ResolvedConfig, logger: Logger): ArtifactWriter;
|
|
42
45
|
/**
|
|
43
46
|
* Generic Promptfoo assertion types available to all evaluation modes.
|
|
44
47
|
*
|
package/dist/composition-root.js
CHANGED
|
@@ -16,11 +16,14 @@
|
|
|
16
16
|
* @see docs/archive/exec-plans/ports-and-adapters/phase-7-composition-root.md
|
|
17
17
|
*/
|
|
18
18
|
import { join } from "node:path";
|
|
19
|
-
import { InMemoryPluginRegistry, NoOpArtifactCollector, generateRunId, } from "./_vendor/ailf-core/index.js";
|
|
19
|
+
import { InMemoryPluginRegistry, NoOpArtifactCollector, NoOpArtifactWriter, generateRunId, isArtifactType, } from "./_vendor/ailf-core/index.js";
|
|
20
|
+
import { AccumulatingArtifactWriter } from "./artifact-capture/accumulating-artifact-writer.js";
|
|
20
21
|
import { ApiGatewayArtifactWriter } from "./artifact-capture/api-gateway-artifact-writer.js";
|
|
22
|
+
import { FanoutArtifactWriter } from "./artifact-capture/fanout-artifact-writer.js";
|
|
21
23
|
import { FilesystemArtifactCollector } from "./artifact-capture/filesystem-collector.js";
|
|
22
24
|
import { GcsArtifactCollector } from "./artifact-capture/gcs-collector.js";
|
|
23
25
|
import { GcsArtifactWriter } from "./artifact-capture/gcs-artifact-writer.js";
|
|
26
|
+
import { LocalFilesystemArtifactWriter } from "./artifact-capture/local-fs-artifact-writer.js";
|
|
24
27
|
import { ContentLakeCacheAdapter } from "./adapters/cache/content-lake-cache.js";
|
|
25
28
|
import { loadExternalPresets } from "./pipeline/compiler/preset-loader.js";
|
|
26
29
|
import { FilesystemCache } from "./adapters/cache/filesystem-cache.js";
|
|
@@ -129,44 +132,95 @@ function createLogger() {
|
|
|
129
132
|
*/
|
|
130
133
|
const DEFAULT_ARTIFACT_BUCKET = "ailf-artifacts";
|
|
131
134
|
/**
|
|
132
|
-
*
|
|
135
|
+
* D0033 M4 default root for local artifacts when `--artifacts-dir` is unset.
|
|
136
|
+
* Mirrors the pre-W0050 capture root so existing dev tooling (Studio
|
|
137
|
+
* retrieval, CI archivers) keeps finding files at the same path prefix.
|
|
138
|
+
*/
|
|
139
|
+
const DEFAULT_LOCAL_ARTIFACTS_DIR = ".ailf/results/captures";
|
|
140
|
+
/**
|
|
141
|
+
* Selects the `ArtifactWriter` wiring per D0033 M4:
|
|
133
142
|
*
|
|
134
|
-
*
|
|
135
|
-
*
|
|
136
|
-
* 2.
|
|
137
|
-
*
|
|
138
|
-
*
|
|
143
|
+
* 1. `--no-artifacts` (`config.artifactsDisabled === true`, or legacy
|
|
144
|
+
* `config.artifactUpload === false`) → `NoOpArtifactWriter`.
|
|
145
|
+
* 2. Otherwise: always attach `LocalFilesystemArtifactWriter` under
|
|
146
|
+
* `--artifacts-dir` (default `.ailf/results/captures`).
|
|
147
|
+
* 3. When a remote backend is reachable (ADC, GCLOUD_PROJECT, or an
|
|
148
|
+
* AILF API key + URL), layer it via `FanoutArtifactWriter([local, gcs])`.
|
|
149
|
+
* Local is listed first so a local success + remote failure still
|
|
150
|
+
* produces a non-null ref.
|
|
139
151
|
*
|
|
140
|
-
*
|
|
141
|
-
*
|
|
142
|
-
*
|
|
152
|
+
* Always returns a writer — pipeline code can assume `ctx.artifactWriter`
|
|
153
|
+
* is present. Producers post-W0050 drop their `if (ctx.artifactWriter)`
|
|
154
|
+
* guards in Slice 6.
|
|
143
155
|
*
|
|
144
156
|
* Exported for unit-test access; not part of the public package API.
|
|
145
157
|
*/
|
|
146
158
|
export function createArtifactWriter(config, logger) {
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
159
|
+
// Legacy `artifactUpload: false` still disables — treat as an alias for
|
|
160
|
+
// the canonical `artifactsDisabled: true` until W0052 removes it.
|
|
161
|
+
if (config.artifactsDisabled === true || config.artifactUpload === false) {
|
|
162
|
+
logger.debug("Artifact writer: NoOpArtifactWriter (--no-artifacts / artifactsDisabled / artifactUpload=false)");
|
|
163
|
+
return new NoOpArtifactWriter();
|
|
164
|
+
}
|
|
165
|
+
const exclude = resolveExcludeList(config.artifactsExclude, logger);
|
|
166
|
+
const rootDir = config.artifactsDir ?? DEFAULT_LOCAL_ARTIFACTS_DIR;
|
|
167
|
+
const local = new LocalFilesystemArtifactWriter({ rootDir, exclude });
|
|
168
|
+
const remote = createRemoteArtifactWriter(config, logger);
|
|
169
|
+
const base = remote
|
|
170
|
+
? new FanoutArtifactWriter([local, remote])
|
|
171
|
+
: local;
|
|
172
|
+
if (!remote) {
|
|
173
|
+
logger.debug(`Artifact writer: LocalFilesystemArtifactWriter only (rootDir=${rootDir})`);
|
|
174
|
+
}
|
|
175
|
+
else {
|
|
176
|
+
logger.debug(`Artifact writer: FanoutArtifactWriter([local=${rootDir}, ${remote.constructor.name}])`);
|
|
150
177
|
}
|
|
178
|
+
// Wrap in the accumulator so FinalizeRunStep can build a populated
|
|
179
|
+
// RunManifest without each producer bookkeeping its own ArtifactRefs
|
|
180
|
+
// (W0051 Slice 3 revisit — Option B of the "manifest empty on real runs"
|
|
181
|
+
// fix).
|
|
182
|
+
return new AccumulatingArtifactWriter(base);
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Validate the exclude list against the registry. Unknown types are dropped
|
|
186
|
+
* with a warning — a typo'd CLI flag shouldn't silently match nothing.
|
|
187
|
+
*/
|
|
188
|
+
function resolveExcludeList(raw, logger) {
|
|
189
|
+
if (!raw || raw.length === 0)
|
|
190
|
+
return [];
|
|
191
|
+
const valid = [];
|
|
192
|
+
for (const name of raw) {
|
|
193
|
+
if (isArtifactType(name)) {
|
|
194
|
+
valid.push(name);
|
|
195
|
+
}
|
|
196
|
+
else {
|
|
197
|
+
logger.warn(`--capture-exclude: "${name}" is not a known artifact type — ignored`);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
return valid;
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* The optional remote-backend writer layered on top of the local writer.
|
|
204
|
+
* Returns null when no credentials are available — the local writer stays
|
|
205
|
+
* the sole backend for that run, which is the D0033 M4 default for laptops
|
|
206
|
+
* and CI without GCS creds.
|
|
207
|
+
*/
|
|
208
|
+
function createRemoteArtifactWriter(config, logger) {
|
|
151
209
|
const bucket = config.artifactGcsBucket ?? DEFAULT_ARTIFACT_BUCKET;
|
|
152
|
-
// CI / GCP runtime — direct GCS upload (fastest, no extra hop).
|
|
153
|
-
// We treat the presence of either env var as the user opting in to ADC.
|
|
154
210
|
const hasGcsCredentials = Boolean(process.env.GOOGLE_APPLICATION_CREDENTIALS || process.env.GCLOUD_PROJECT);
|
|
155
211
|
if (hasGcsCredentials) {
|
|
156
|
-
logger.debug(`Artifact
|
|
212
|
+
logger.debug(`Artifact remote backend: GcsArtifactWriter (ADC, bucket=${bucket})`);
|
|
157
213
|
return new GcsArtifactWriter({ bucket });
|
|
158
214
|
}
|
|
159
|
-
// Local dev — request signed PUT URLs from the API gateway, no GCS creds needed.
|
|
160
215
|
if (config.apiKey && config.apiUrl) {
|
|
161
|
-
logger.debug(`Artifact
|
|
216
|
+
logger.debug(`Artifact remote backend: ApiGatewayArtifactWriter (via ${config.apiUrl}, bucket=${bucket})`);
|
|
162
217
|
return new ApiGatewayArtifactWriter({
|
|
163
218
|
apiBaseUrl: config.apiUrl,
|
|
164
219
|
apiKey: config.apiKey,
|
|
165
220
|
bucket,
|
|
166
221
|
});
|
|
167
222
|
}
|
|
168
|
-
|
|
169
|
-
return undefined;
|
|
223
|
+
return null;
|
|
170
224
|
}
|
|
171
225
|
function createCache(config) {
|
|
172
226
|
const local = new FilesystemCache(config.rootDir);
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
* each step completes. This enables the GET /v1/jobs/:jobId polling
|
|
12
12
|
* endpoint to show real-time progress.
|
|
13
13
|
*/
|
|
14
|
-
import type
|
|
14
|
+
import { type AppContext, type PipelineResult, type PipelineStep } from "../_vendor/ailf-core/index.d.ts";
|
|
15
15
|
/**
|
|
16
16
|
* Run a sequence of pipeline steps, short-circuiting on required step failure.
|
|
17
17
|
*
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
* each step completes. This enables the GET /v1/jobs/:jobId polling
|
|
12
12
|
* endpoint to show real-time progress.
|
|
13
13
|
*/
|
|
14
|
+
import { assoc, } from "../_vendor/ailf-core/index.js";
|
|
14
15
|
import { runStep } from "./step-runner.js";
|
|
15
16
|
// ---------------------------------------------------------------------------
|
|
16
17
|
// Job progress reporter
|
|
@@ -75,28 +76,40 @@ async function reportJobProgress(ctx, stepName, completedSteps, totalSteps, stat
|
|
|
75
76
|
* Capture a snapshot of the pipeline config, final state, and step results.
|
|
76
77
|
* Strips secrets (API keys, tokens) from the config.
|
|
77
78
|
*/
|
|
78
|
-
function capturePipelineContext(ctx, state, results) {
|
|
79
|
-
if (!ctx.collector.enabled)
|
|
80
|
-
return;
|
|
79
|
+
async function capturePipelineContext(ctx, state, results) {
|
|
81
80
|
const sanitized = Object.fromEntries(Object.entries(ctx.config).filter(([k]) => !/token|secret|key/i.test(k)));
|
|
82
|
-
ctx.collector.capture("pipeline", "pipeline-context",
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
81
|
+
// W0050 — migrated from ctx.collector.capture("pipeline", "pipeline-context", …)
|
|
82
|
+
// to the registry-driven emit() path. The writer handles redaction,
|
|
83
|
+
// --capture-exclude gating, and local+GCS fanout internally.
|
|
84
|
+
//
|
|
85
|
+
// Awaited (not fire-and-forget) so the write is observable by the
|
|
86
|
+
// orchestrator's caller — a fire-and-forget let the emit fall through
|
|
87
|
+
// to runtime teardown in tests with aggressive afterEach cleanup.
|
|
88
|
+
// `emit` is non-blocking internally (P5): failures return null + warn,
|
|
89
|
+
// never throw, so awaiting can't surface a rejected promise either.
|
|
90
|
+
try {
|
|
91
|
+
await ctx.artifactWriter.emit("pipelineContext", assoc(ctx), {
|
|
92
|
+
config: sanitized,
|
|
93
|
+
state: {
|
|
94
|
+
reportId: state.reportId,
|
|
95
|
+
evalFingerprint: state.evalFingerprint,
|
|
96
|
+
belowCritical: state.belowCritical,
|
|
97
|
+
remoteCacheHits: state.remoteCacheHits
|
|
98
|
+
? [...state.remoteCacheHits]
|
|
99
|
+
: undefined,
|
|
100
|
+
releaseAutoScope: state.releaseAutoScope,
|
|
101
|
+
testSummary: state.testSummary,
|
|
102
|
+
},
|
|
103
|
+
steps: Object.entries(results).map(([name, result]) => ({
|
|
104
|
+
name,
|
|
105
|
+
status: result.status,
|
|
106
|
+
durationMs: result.status !== "skipped" ? result.durationMs : undefined,
|
|
107
|
+
})),
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
catch (err) {
|
|
111
|
+
ctx.logger.debug(`pipelineContext emit rejected: ${err instanceof Error ? err.message : String(err)}`);
|
|
112
|
+
}
|
|
100
113
|
}
|
|
101
114
|
/**
|
|
102
115
|
* Flush captured artifacts to disk. Non-blocking — failures are logged
|
|
@@ -170,10 +183,10 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
170
183
|
}, jobUpdates);
|
|
171
184
|
}
|
|
172
185
|
// Capture pipeline context and job updates before flushing
|
|
173
|
-
capturePipelineContext(ctx, state, results);
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
186
|
+
await capturePipelineContext(ctx, state, results);
|
|
187
|
+
// W0050 — `job-updates` was an observability-only capture not tied
|
|
188
|
+
// to a registered artifact type; dropped here. Use the JobStore
|
|
189
|
+
// path if job telemetry is needed.
|
|
177
190
|
// Flush captured artifacts even on failure (partial capture is useful)
|
|
178
191
|
await flushArtifacts(ctx);
|
|
179
192
|
return {
|
|
@@ -229,11 +242,9 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
229
242
|
ctx.logger.warn("Failed to report job completion — continuing");
|
|
230
243
|
}
|
|
231
244
|
}
|
|
232
|
-
// Capture pipeline context
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
ctx.collector.capture("job-store", "job-updates", jobUpdates);
|
|
236
|
-
}
|
|
245
|
+
// Capture pipeline context. `job-updates` observability captures were
|
|
246
|
+
// dropped in Slice 6.1 — JobStore is the supported telemetry path.
|
|
247
|
+
await capturePipelineContext(ctx, state, results);
|
|
237
248
|
// Flush captured artifacts (non-blocking — failures never affect pipeline result)
|
|
238
249
|
await flushArtifacts(ctx);
|
|
239
250
|
return {
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Calls calculateAndWriteScores() from pipeline/calculate-scores.ts with
|
|
5
5
|
* typed options derived from AppContext. No env bridge needed.
|
|
6
6
|
*/
|
|
7
|
-
import type
|
|
7
|
+
import { type AppContext, type PipelineState, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
8
8
|
export declare class CalculateScoresStep implements PipelineStep {
|
|
9
9
|
readonly name = "calculate-scores";
|
|
10
10
|
check(): ValidationIssue[];
|
|
@@ -6,6 +6,8 @@
|
|
|
6
6
|
*/
|
|
7
7
|
import { existsSync, readFileSync } from "node:fs";
|
|
8
8
|
import { join, resolve } from "path";
|
|
9
|
+
import { assoc, } from "../../_vendor/ailf-core/index.js";
|
|
10
|
+
import { emitFileContents } from "../../artifact-capture/emit-file.js";
|
|
9
11
|
import { LiteracyVariant } from "../../pipeline/normalize-mode.js";
|
|
10
12
|
import { getStepInputPaths } from "../../pipeline/cache.js";
|
|
11
13
|
import { buildCacheContext } from "../cache-context.js";
|
|
@@ -122,16 +124,15 @@ export class CalculateScoresStep {
|
|
|
122
124
|
state.belowCritical = belowCritical;
|
|
123
125
|
}
|
|
124
126
|
// Capture score artifacts
|
|
127
|
+
// W0050 — score-summary → scoreSummary (run-scoped bulk).
|
|
128
|
+
// grader-judgments.json and test-results.json were aggregated captures
|
|
129
|
+
// without registered descriptors. graderJudgments is now per-entry
|
|
130
|
+
// ({run, mode, task, model, grader}) and lands via run-eval-step in
|
|
131
|
+
// Slice 6.6; the aggregated file is dropped.
|
|
125
132
|
const resultsDir = join(ctx.config.rootDir, "results", "latest");
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
"test-results.json",
|
|
130
|
-
]) {
|
|
131
|
-
const filePath = join(resultsDir, file);
|
|
132
|
-
if (existsSync(filePath)) {
|
|
133
|
-
ctx.collector.captureFile("calculate-scores", file.replace(".json", ""), filePath);
|
|
134
|
-
}
|
|
133
|
+
const summaryPath = join(resultsDir, "score-summary.json");
|
|
134
|
+
if (existsSync(summaryPath)) {
|
|
135
|
+
await emitFileContents(ctx.artifactWriter, "scoreSummary", assoc(ctx), summaryPath);
|
|
135
136
|
}
|
|
136
137
|
// Upload testOutputs to GCS (D0032 — non-blocking, P5).
|
|
137
138
|
// Read from test-results.json rather than score-summary.json: the
|
|
@@ -142,16 +143,15 @@ export class CalculateScoresStep {
|
|
|
142
143
|
// The full responseOutput lives in the GCS artifact; PublishReportStep
|
|
143
144
|
// later strips it from the inline Content Lake document when this
|
|
144
145
|
// upload succeeds.
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
}
|
|
146
|
+
// W0050 — ctx.artifactWriter is always present; no guard needed.
|
|
147
|
+
const testResults = tryReadTestResults(ctx.config.rootDir);
|
|
148
|
+
if (testResults?.length) {
|
|
149
|
+
const artifactRef = await uploadTestOutputs(ctx.artifactWriter, ctx.runId, testResults);
|
|
150
|
+
if (artifactRef) {
|
|
151
|
+
state.artifactRefs = {
|
|
152
|
+
...state.artifactRefs,
|
|
153
|
+
testOutputs: artifactRef,
|
|
154
|
+
};
|
|
155
155
|
}
|
|
156
156
|
}
|
|
157
157
|
const criticalSuffix = belowCritical.length > 0
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
* @see packages/eval/src/pipeline/callback-delivery.ts
|
|
12
12
|
* @see docs/design-docs/api-service-gateway.md
|
|
13
13
|
*/
|
|
14
|
-
import type
|
|
14
|
+
import { type AppContext, type PipelineState, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
15
15
|
import { type CallbackConfig } from "../../pipeline/callback-delivery.js";
|
|
16
16
|
export declare class CallbackStep implements PipelineStep {
|
|
17
17
|
private readonly callback;
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
*/
|
|
14
14
|
import { readFileSync } from "fs";
|
|
15
15
|
import { resolve } from "path";
|
|
16
|
+
import { assoc, } from "../../_vendor/ailf-core/index.js";
|
|
16
17
|
import { deliverCallback, } from "../../pipeline/callback-delivery.js";
|
|
17
18
|
export class CallbackStep {
|
|
18
19
|
callback;
|
|
@@ -58,11 +59,12 @@ export class CallbackStep {
|
|
|
58
59
|
reportId: state.reportId,
|
|
59
60
|
summary,
|
|
60
61
|
};
|
|
61
|
-
//
|
|
62
|
-
|
|
62
|
+
// W0050 — callbackRequest/callbackResponse are per-entry artifacts
|
|
63
|
+
// keyed by the callback target URL (the `name` slot on the association).
|
|
64
|
+
const callbackName = this.callback.url;
|
|
65
|
+
await ctx.artifactWriter.emit("callbackRequest", assoc(ctx, { name: callbackName }), callbackPayload);
|
|
63
66
|
const result = await deliverCallback(this.callback, callbackPayload);
|
|
64
|
-
|
|
65
|
-
ctx.collector.capture("callback", "callback-response", {
|
|
67
|
+
await ctx.artifactWriter.emit("callbackResponse", assoc(ctx, { name: callbackName }), {
|
|
66
68
|
ok: result.ok,
|
|
67
69
|
attempts: result.attempts,
|
|
68
70
|
error: result.error,
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* inlined directly from the former pipeline/steps/compare-step.ts.
|
|
6
6
|
* This is an optional step — failure doesn't stop the pipeline.
|
|
7
7
|
*/
|
|
8
|
-
import type
|
|
8
|
+
import { type AppContext, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
9
9
|
export declare class CompareStep implements PipelineStep {
|
|
10
10
|
readonly name = "compare";
|
|
11
11
|
readonly optional = true;
|
|
@@ -7,6 +7,8 @@
|
|
|
7
7
|
*/
|
|
8
8
|
import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync, } from "fs";
|
|
9
9
|
import { join, resolve } from "path";
|
|
10
|
+
import { assoc, } from "../../_vendor/ailf-core/index.js";
|
|
11
|
+
import { emitFileContents } from "../../artifact-capture/emit-file.js";
|
|
10
12
|
import { compare } from "../../pipeline/compare.js";
|
|
11
13
|
export class CompareStep {
|
|
12
14
|
name = "compare";
|
|
@@ -69,8 +71,8 @@ export class CompareStep {
|
|
|
69
71
|
mkdirSync(ctx.config.outputDir, { recursive: true });
|
|
70
72
|
const reportPath = resolve(ctx.config.outputDir, "comparison-report.json");
|
|
71
73
|
writeFileSync(reportPath, JSON.stringify(report, null, 2));
|
|
72
|
-
//
|
|
73
|
-
ctx.
|
|
74
|
+
// W0050 — comparisonReport is per-entry keyed by mode ({run, mode}).
|
|
75
|
+
await emitFileContents(ctx.artifactWriter, "comparisonReport", assoc(ctx, { mode: ctx.config.mode }), reportPath);
|
|
74
76
|
// Build summary
|
|
75
77
|
const improved = report.improved.length;
|
|
76
78
|
const regressed = report.regressed.length;
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Calls pure functions from pipeline/discovery-report.ts directly.
|
|
5
5
|
* Optional step — failure doesn't stop the pipeline.
|
|
6
6
|
*/
|
|
7
|
-
import type
|
|
7
|
+
import { type AppContext, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
8
8
|
export declare class DiscoveryReportStep implements PipelineStep {
|
|
9
9
|
readonly name = "discovery-report";
|
|
10
10
|
readonly optional = true;
|
|
@@ -6,6 +6,8 @@
|
|
|
6
6
|
*/
|
|
7
7
|
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
8
8
|
import { resolve } from "path";
|
|
9
|
+
import { assoc, } from "../../_vendor/ailf-core/index.js";
|
|
10
|
+
import { emitFileContents } from "../../artifact-capture/emit-file.js";
|
|
9
11
|
import { formatDiscoveryMarkdown, generateDiscoveryReport, } from "../../pipeline/discovery-report.js";
|
|
10
12
|
export class DiscoveryReportStep {
|
|
11
13
|
name = "discovery-report";
|
|
@@ -38,7 +40,8 @@ export class DiscoveryReportStep {
|
|
|
38
40
|
mkdirSync(ctx.config.outputDir, { recursive: true });
|
|
39
41
|
const discoveryPath = resolve(ctx.config.outputDir, "discovery-report.md");
|
|
40
42
|
writeFileSync(discoveryPath, md);
|
|
41
|
-
|
|
43
|
+
// W0050 — discoveryReport is per-entry keyed by mode.
|
|
44
|
+
await emitFileContents(ctx.artifactWriter, "discoveryReport", assoc(ctx, { mode: ctx.config.mode }), discoveryPath);
|
|
42
45
|
console.log(md);
|
|
43
46
|
const invisible = report.invisibleDocs.length;
|
|
44
47
|
const f1 = report.overall.avgF1.toFixed(2);
|