@sanity/ailf 2.8.0 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/dist/_vendor/ailf-core/artifact-capture/association.d.ts +35 -0
  2. package/dist/_vendor/ailf-core/artifact-capture/association.js +28 -0
  3. package/dist/_vendor/ailf-core/artifact-registry.d.ts +124 -23
  4. package/dist/_vendor/ailf-core/artifact-registry.js +724 -63
  5. package/dist/_vendor/ailf-core/index.d.ts +2 -1
  6. package/dist/_vendor/ailf-core/index.js +2 -1
  7. package/dist/_vendor/ailf-core/ports/artifact-writer.d.ts +59 -20
  8. package/dist/_vendor/ailf-core/ports/artifact-writer.js +33 -10
  9. package/dist/_vendor/ailf-core/ports/context.d.ts +21 -2
  10. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +6 -6
  11. package/dist/_vendor/ailf-core/services/index.d.ts +1 -0
  12. package/dist/_vendor/ailf-core/services/index.js +1 -0
  13. package/dist/_vendor/ailf-core/services/slim-report-summary.d.ts +31 -0
  14. package/dist/_vendor/ailf-core/services/slim-report-summary.js +217 -0
  15. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +33 -0
  16. package/dist/_vendor/ailf-core/types/index.d.ts +202 -23
  17. package/dist/artifact-capture/accumulating-artifact-writer.d.ts +50 -0
  18. package/dist/artifact-capture/accumulating-artifact-writer.js +111 -0
  19. package/dist/artifact-capture/api-gateway-artifact-writer.d.ts +17 -4
  20. package/dist/artifact-capture/api-gateway-artifact-writer.js +58 -7
  21. package/dist/artifact-capture/emit-file.d.ts +28 -0
  22. package/dist/artifact-capture/emit-file.js +56 -0
  23. package/dist/artifact-capture/fanout-artifact-writer.d.ts +39 -0
  24. package/dist/artifact-capture/fanout-artifact-writer.js +76 -0
  25. package/dist/artifact-capture/filesystem-collector.d.ts +22 -4
  26. package/dist/artifact-capture/filesystem-collector.js +48 -23
  27. package/dist/artifact-capture/gcs-artifact-writer.d.ts +40 -3
  28. package/dist/artifact-capture/gcs-artifact-writer.js +238 -14
  29. package/dist/artifact-capture/local-fs-artifact-writer.d.ts +71 -0
  30. package/dist/artifact-capture/local-fs-artifact-writer.js +273 -0
  31. package/dist/commands/explain-handler.js +4 -0
  32. package/dist/commands/pipeline-action.d.ts +5 -0
  33. package/dist/commands/pipeline-action.js +56 -5
  34. package/dist/commands/pipeline.d.ts +4 -0
  35. package/dist/commands/pipeline.js +6 -2
  36. package/dist/commands/publish.js +4 -1
  37. package/dist/composition-root.d.ts +13 -10
  38. package/dist/composition-root.js +74 -20
  39. package/dist/orchestration/pipeline-orchestrator.d.ts +1 -1
  40. package/dist/orchestration/pipeline-orchestrator.js +41 -30
  41. package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -1
  42. package/dist/orchestration/steps/calculate-scores-step.js +19 -19
  43. package/dist/orchestration/steps/callback-step.d.ts +1 -1
  44. package/dist/orchestration/steps/callback-step.js +6 -4
  45. package/dist/orchestration/steps/compare-step.d.ts +1 -1
  46. package/dist/orchestration/steps/compare-step.js +4 -2
  47. package/dist/orchestration/steps/discovery-report-step.d.ts +1 -1
  48. package/dist/orchestration/steps/discovery-report-step.js +4 -1
  49. package/dist/orchestration/steps/fetch-docs-step.js +9 -15
  50. package/dist/orchestration/steps/finalize-run-step.js +21 -7
  51. package/dist/orchestration/steps/gap-analysis-step.js +34 -6
  52. package/dist/orchestration/steps/generate-configs-step.d.ts +1 -1
  53. package/dist/orchestration/steps/generate-configs-step.js +11 -11
  54. package/dist/orchestration/steps/publish-report-step.d.ts +1 -1
  55. package/dist/orchestration/steps/publish-report-step.js +24 -19
  56. package/dist/orchestration/steps/readiness-step.d.ts +1 -1
  57. package/dist/orchestration/steps/readiness-step.js +4 -1
  58. package/dist/orchestration/steps/report-step.d.ts +1 -1
  59. package/dist/orchestration/steps/report-step.js +6 -3
  60. package/dist/orchestration/steps/run-eval-step.js +14 -9
  61. package/dist/pipeline/compare.d.ts +2 -2
  62. package/dist/pipeline/emit-eval-results.d.ts +38 -0
  63. package/dist/pipeline/emit-eval-results.js +100 -0
  64. package/package.json +1 -1
@@ -263,13 +263,40 @@ export function computeResolvedOptions(opts) {
263
263
  tagOption,
264
264
  taskSourceType: resolvedTaskSourceType,
265
265
  urlArgs,
266
- captureEnabled: opts.capture || process.env.AILF_CAPTURE === "1",
267
- captureDir: opts.captureDir ?? process.env.AILF_CAPTURE_DIR,
266
+ captureEnabled: opts.capture,
267
+ captureDir: resolveArtifactsDir(opts),
268
268
  captureCompress: opts.captureCompress !== false &&
269
269
  process.env.AILF_CAPTURE_COMPRESS !== "0",
270
270
  captureExtras: opts.captureExtras !== false && process.env.AILF_CAPTURE_EXTRAS !== "0",
271
+ artifactsDisabled: opts.artifacts === false,
272
+ artifactsDir: resolveArtifactsDir(opts),
273
+ artifactsDryRun: opts.artifactsDryRun,
274
+ artifactsExclude: parseCaptureExcludeList(opts.captureExclude),
271
275
  };
272
276
  }
277
+ /**
278
+ * Resolve the artifacts / capture output directory from CLI flags and env
279
+ * vars. Precedence (highest first):
280
+ * 1. `--artifacts-dir` flag
281
+ * 2. `--capture-dir` flag (deprecated alias; no warning — silent rewrite)
282
+ * 3. `AILF_ARTIFACTS_DIR` env var
283
+ * 4. `AILF_CAPTURE_DIR` env var (deprecated alias; silent)
284
+ */
285
+ function resolveArtifactsDir(opts) {
286
+ return (opts.artifactsDir ??
287
+ opts.captureDir ??
288
+ process.env.AILF_ARTIFACTS_DIR ??
289
+ process.env.AILF_CAPTURE_DIR);
290
+ }
291
+ function parseCaptureExcludeList(raw) {
292
+ if (!raw)
293
+ return undefined;
294
+ const list = raw
295
+ .split(",")
296
+ .map((s) => s.trim())
297
+ .filter(Boolean);
298
+ return list.length > 0 ? list : undefined;
299
+ }
273
300
  /** Resolve and validate the --task-source flag value. */
274
301
  function resolveTaskSourceType(raw) {
275
302
  if (!raw || raw === "content-lake")
@@ -282,6 +309,20 @@ function resolveTaskSourceType(raw) {
282
309
  // ---------------------------------------------------------------------------
283
310
  // Pipeline entry point
284
311
  // ---------------------------------------------------------------------------
312
+ /**
313
+ * Module-level flag so the `--capture` deprecation warning fires exactly
314
+ * once per process even when `executePipeline` is invoked multiple times
315
+ * (e.g. tests, long-lived dev loops).
316
+ */
317
+ let warnedCaptureDeprecation = false;
318
+ function warnCaptureDeprecationIfNeeded(cliOpts) {
319
+ if (!cliOpts.capture)
320
+ return;
321
+ if (warnedCaptureDeprecation)
322
+ return;
323
+ warnedCaptureDeprecation = true;
324
+ console.warn("--capture is deprecated and will be removed in a future release; use --artifacts-dir or --no-artifacts instead");
325
+ }
285
326
  /**
286
327
  * Execute the evaluation pipeline.
287
328
  *
@@ -291,6 +332,7 @@ function resolveTaskSourceType(raw) {
291
332
  * 4. Delegate to the PipelineOrchestrator
292
333
  */
293
334
  export async function executePipeline(cliOpts) {
335
+ warnCaptureDeprecationIfNeeded(cliOpts);
294
336
  // When --config is provided, resolve config from file instead of CLI flags
295
337
  if (cliOpts.config) {
296
338
  const { existsSync } = await import("fs");
@@ -317,9 +359,11 @@ export async function executePipeline(cliOpts) {
317
359
  config.outputDir = resolveOutputDir(cliOpts.outputDir);
318
360
  // Capture options — CLI flags and env vars aren't in the config file,
319
361
  // so merge them here (same logic as resolveOptions).
320
- config.captureEnabled = cliOpts.capture || process.env.AILF_CAPTURE === "1";
321
- if (cliOpts.captureDir ?? process.env.AILF_CAPTURE_DIR) {
322
- config.captureDir = cliOpts.captureDir ?? process.env.AILF_CAPTURE_DIR;
362
+ // AILF_CAPTURE is a no-op post-W0049; only the flag toggles captureEnabled.
363
+ config.captureEnabled = cliOpts.capture;
364
+ const resolvedArtifactsDir = resolveArtifactsDir(cliOpts);
365
+ if (resolvedArtifactsDir) {
366
+ config.captureDir = resolvedArtifactsDir;
323
367
  }
324
368
  config.captureCompress =
325
369
  cliOpts.captureCompress !== false &&
@@ -328,6 +372,13 @@ export async function executePipeline(cliOpts) {
328
372
  cliOpts.captureExtras !== false && process.env.AILF_CAPTURE_EXTRAS !== "0";
329
373
  config.captureGcsBucket ??= process.env.AILF_CAPTURE_GCS_BUCKET;
330
374
  config.captureGcsPrefix ??= process.env.AILF_CAPTURE_GCS_PREFIX;
375
+ config.artifactsDisabled ??= cliOpts.artifacts === false;
376
+ config.artifactsDir ??= resolvedArtifactsDir;
377
+ config.artifactsDryRun ??= cliOpts.artifactsDryRun;
378
+ const excludeList = parseCaptureExcludeList(cliOpts.captureExclude);
379
+ if (excludeList) {
380
+ config.artifactsExclude = excludeList;
381
+ }
331
382
  config.artifactGcsBucket ??= process.env.AILF_GCS_ARTIFACT_BUCKET;
332
383
  config.artifactUpload ??= parseArtifactUploadEnv(process.env.AILF_ARTIFACT_UPLOAD);
333
384
  // Create AppContext directly from the merged config so adapters
@@ -68,5 +68,9 @@ export interface PipelineCliOptions {
68
68
  captureDir?: string;
69
69
  captureCompress: boolean;
70
70
  captureExtras: boolean;
71
+ artifacts: boolean;
72
+ artifactsDir?: string;
73
+ artifactsDryRun: boolean;
74
+ captureExclude?: string;
71
75
  }
72
76
  export declare function createPipelineCommand(): Command;
@@ -54,10 +54,14 @@ export function createPipelineCommand() {
54
54
  .option("--repo-tasks-path <path>", "Path to repo-based task definitions (.ailf/tasks/ directory)")
55
55
  .option("--remote", "Submit evaluation to the AILF API instead of running locally", false)
56
56
  .option("--api-url <url>", "AILF API base URL (default: https://ailf-api.sanity.build)")
57
- .option("--capture", "Enable artifact capture for this run", false)
58
- .option("--capture-dir <path>", "Base directory for capture output (default: .ailf/results/captures/)")
57
+ .option("--capture", "[DEPRECATED] Enable legacy artifact capture. Use --artifacts-dir / --no-artifacts instead.", false)
58
+ .option("--capture-dir <path>", "[DEPRECATED] Alias for --artifacts-dir.")
59
59
  .option("--no-capture-compress", "Disable tar.gz compression of captures")
60
60
  .option("--no-capture-extras", "Exclude mode-specific artifacts from captures")
61
+ .option("--no-artifacts", "Disable all artifact writers (D0033). Overrides --artifacts-dir.")
62
+ .option("--artifacts-dir <path>", "Root directory for local artifact output (D0033; default: .ailf/results/captures/)")
63
+ .option("--artifacts-dry-run", "Run artifact writers in dry-run mode — log intended writes, touch no storage", false)
64
+ .option("--capture-exclude <types>", "Comma-separated artifact types to skip (e.g. traces,graderPrompts)")
61
65
  .action(async (opts) => {
62
66
  const { executePipeline } = await import("./pipeline-action.js");
63
67
  await executePipeline(opts);
@@ -27,6 +27,7 @@ import { addOutputDirOption } from "./shared/options.js";
27
27
  import { getCallerCwd, resolveOutputDir } from "./shared/resolve-output-dir.js";
28
28
  import { buildProvenance, } from "../pipeline/provenance.js";
29
29
  import { generateReportTitle } from "../pipeline/report-title.js";
30
+ import { buildSlimReportSummary } from "../_vendor/ailf-core/index.js";
30
31
  import { generateReportId, } from "../report-store.js";
31
32
  import { withRetry } from "../sinks/retry.js";
32
33
  const __dirname = dirname(fileURLToPath(import.meta.url));
@@ -174,13 +175,15 @@ async function runPublishCommand(summaryPath, outputDir, opts) {
174
175
  }
175
176
  const reportId = generateReportId();
176
177
  const title = generateReportTitle({ provenance });
178
+ // W0051 Slice 3: slim the summary at publish time.
179
+ const slimSummary = buildSlimReportSummary(summary, provenance.mode);
177
180
  const report = {
178
181
  comparison: comparison ?? undefined,
179
182
  completedAt: now,
180
183
  durationMs: 0, // manual publish — no pipeline duration
181
184
  id: reportId,
182
185
  provenance,
183
- summary,
186
+ summary: slimSummary,
184
187
  tag: opts.tag,
185
188
  title,
186
189
  };
@@ -24,21 +24,24 @@ import { type AppContext, type ArtifactWriter, type AssertionRegistration, type
24
24
  */
25
25
  export declare function createAppContext(config: ResolvedConfig): AppContext;
26
26
  /**
27
- * Selects an ArtifactWriter implementation based on available credentials.
27
+ * Selects the `ArtifactWriter` wiring per D0033 M4:
28
28
  *
29
- * Selection order:
30
- * 1. config.artifactUpload === false → always skip (explicit opt-out)
31
- * 2. GOOGLE_APPLICATION_CREDENTIALS or GCLOUD_PROJECT present → direct GCS
32
- * 3. apiKey + apiUrl present → gateway-signed PUT URL
33
- * 4. Neither skip silently (P5)
29
+ * 1. `--no-artifacts` (`config.artifactsDisabled === true`, or legacy
30
+ * `config.artifactUpload === false`)`NoOpArtifactWriter`.
31
+ * 2. Otherwise: always attach `LocalFilesystemArtifactWriter` under
32
+ * `--artifacts-dir` (default `.ailf/results/captures`).
33
+ * 3. When a remote backend is reachable (ADC, GCLOUD_PROJECT, or an
34
+ * AILF API key + URL), layer it via `FanoutArtifactWriter([local, gcs])`.
35
+ * Local is listed first so a local success + remote failure still
36
+ * produces a non-null ref.
34
37
  *
35
- * The bucket defaults to DEFAULT_ARTIFACT_BUCKET when not explicitly set
36
- * users only need to override for self-hosted deployments with a different
37
- * bucket (and matching gateway signing credentials).
38
+ * Always returns a writer pipeline code can assume `ctx.artifactWriter`
39
+ * is present. Producers post-W0050 drop their `if (ctx.artifactWriter)`
40
+ * guards in Slice 6.
38
41
  *
39
42
  * Exported for unit-test access; not part of the public package API.
40
43
  */
41
- export declare function createArtifactWriter(config: ResolvedConfig, logger: Logger): ArtifactWriter | undefined;
44
+ export declare function createArtifactWriter(config: ResolvedConfig, logger: Logger): ArtifactWriter;
42
45
  /**
43
46
  * Generic Promptfoo assertion types available to all evaluation modes.
44
47
  *
@@ -16,11 +16,14 @@
16
16
  * @see docs/archive/exec-plans/ports-and-adapters/phase-7-composition-root.md
17
17
  */
18
18
  import { join } from "node:path";
19
- import { InMemoryPluginRegistry, NoOpArtifactCollector, generateRunId, } from "./_vendor/ailf-core/index.js";
19
+ import { InMemoryPluginRegistry, NoOpArtifactCollector, NoOpArtifactWriter, generateRunId, isArtifactType, } from "./_vendor/ailf-core/index.js";
20
+ import { AccumulatingArtifactWriter } from "./artifact-capture/accumulating-artifact-writer.js";
20
21
  import { ApiGatewayArtifactWriter } from "./artifact-capture/api-gateway-artifact-writer.js";
22
+ import { FanoutArtifactWriter } from "./artifact-capture/fanout-artifact-writer.js";
21
23
  import { FilesystemArtifactCollector } from "./artifact-capture/filesystem-collector.js";
22
24
  import { GcsArtifactCollector } from "./artifact-capture/gcs-collector.js";
23
25
  import { GcsArtifactWriter } from "./artifact-capture/gcs-artifact-writer.js";
26
+ import { LocalFilesystemArtifactWriter } from "./artifact-capture/local-fs-artifact-writer.js";
24
27
  import { ContentLakeCacheAdapter } from "./adapters/cache/content-lake-cache.js";
25
28
  import { loadExternalPresets } from "./pipeline/compiler/preset-loader.js";
26
29
  import { FilesystemCache } from "./adapters/cache/filesystem-cache.js";
@@ -129,44 +132,95 @@ function createLogger() {
129
132
  */
130
133
  const DEFAULT_ARTIFACT_BUCKET = "ailf-artifacts";
131
134
  /**
132
- * Selects an ArtifactWriter implementation based on available credentials.
135
+ * D0033 M4 default root for local artifacts when `--artifacts-dir` is unset.
136
+ * Mirrors the pre-W0050 capture root so existing dev tooling (Studio
137
+ * retrieval, CI archivers) keeps finding files at the same path prefix.
138
+ */
139
+ const DEFAULT_LOCAL_ARTIFACTS_DIR = ".ailf/results/captures";
140
+ /**
141
+ * Selects the `ArtifactWriter` wiring per D0033 M4:
133
142
  *
134
- * Selection order:
135
- * 1. config.artifactUpload === false → always skip (explicit opt-out)
136
- * 2. GOOGLE_APPLICATION_CREDENTIALS or GCLOUD_PROJECT present → direct GCS
137
- * 3. apiKey + apiUrl present → gateway-signed PUT URL
138
- * 4. Neither skip silently (P5)
143
+ * 1. `--no-artifacts` (`config.artifactsDisabled === true`, or legacy
144
+ * `config.artifactUpload === false`)`NoOpArtifactWriter`.
145
+ * 2. Otherwise: always attach `LocalFilesystemArtifactWriter` under
146
+ * `--artifacts-dir` (default `.ailf/results/captures`).
147
+ * 3. When a remote backend is reachable (ADC, GCLOUD_PROJECT, or an
148
+ * AILF API key + URL), layer it via `FanoutArtifactWriter([local, gcs])`.
149
+ * Local is listed first so a local success + remote failure still
150
+ * produces a non-null ref.
139
151
  *
140
- * The bucket defaults to DEFAULT_ARTIFACT_BUCKET when not explicitly set
141
- * users only need to override for self-hosted deployments with a different
142
- * bucket (and matching gateway signing credentials).
152
+ * Always returns a writer pipeline code can assume `ctx.artifactWriter`
153
+ * is present. Producers post-W0050 drop their `if (ctx.artifactWriter)`
154
+ * guards in Slice 6.
143
155
  *
144
156
  * Exported for unit-test access; not part of the public package API.
145
157
  */
146
158
  export function createArtifactWriter(config, logger) {
147
- if (config.artifactUpload === false) {
148
- logger.debug("Artifact upload explicitly disabled via artifactUpload=false");
149
- return undefined;
159
+ // Legacy `artifactUpload: false` still disables — treat as an alias for
160
+ // the canonical `artifactsDisabled: true` until W0052 removes it.
161
+ if (config.artifactsDisabled === true || config.artifactUpload === false) {
162
+ logger.debug("Artifact writer: NoOpArtifactWriter (--no-artifacts / artifactsDisabled / artifactUpload=false)");
163
+ return new NoOpArtifactWriter();
164
+ }
165
+ const exclude = resolveExcludeList(config.artifactsExclude, logger);
166
+ const rootDir = config.artifactsDir ?? DEFAULT_LOCAL_ARTIFACTS_DIR;
167
+ const local = new LocalFilesystemArtifactWriter({ rootDir, exclude });
168
+ const remote = createRemoteArtifactWriter(config, logger);
169
+ const base = remote
170
+ ? new FanoutArtifactWriter([local, remote])
171
+ : local;
172
+ if (!remote) {
173
+ logger.debug(`Artifact writer: LocalFilesystemArtifactWriter only (rootDir=${rootDir})`);
174
+ }
175
+ else {
176
+ logger.debug(`Artifact writer: FanoutArtifactWriter([local=${rootDir}, ${remote.constructor.name}])`);
150
177
  }
178
+ // Wrap in the accumulator so FinalizeRunStep can build a populated
179
+ // RunManifest without each producer bookkeeping its own ArtifactRefs
180
+ // (W0051 Slice 3 revisit — Option B of the "manifest empty on real runs"
181
+ // fix).
182
+ return new AccumulatingArtifactWriter(base);
183
+ }
184
+ /**
185
+ * Validate the exclude list against the registry. Unknown types are dropped
186
+ * with a warning — a typo'd CLI flag shouldn't silently match nothing.
187
+ */
188
+ function resolveExcludeList(raw, logger) {
189
+ if (!raw || raw.length === 0)
190
+ return [];
191
+ const valid = [];
192
+ for (const name of raw) {
193
+ if (isArtifactType(name)) {
194
+ valid.push(name);
195
+ }
196
+ else {
197
+ logger.warn(`--capture-exclude: "${name}" is not a known artifact type — ignored`);
198
+ }
199
+ }
200
+ return valid;
201
+ }
202
+ /**
203
+ * The optional remote-backend writer layered on top of the local writer.
204
+ * Returns null when no credentials are available — the local writer stays
205
+ * the sole backend for that run, which is the D0033 M4 default for laptops
206
+ * and CI without GCS creds.
207
+ */
208
+ function createRemoteArtifactWriter(config, logger) {
151
209
  const bucket = config.artifactGcsBucket ?? DEFAULT_ARTIFACT_BUCKET;
152
- // CI / GCP runtime — direct GCS upload (fastest, no extra hop).
153
- // We treat the presence of either env var as the user opting in to ADC.
154
210
  const hasGcsCredentials = Boolean(process.env.GOOGLE_APPLICATION_CREDENTIALS || process.env.GCLOUD_PROJECT);
155
211
  if (hasGcsCredentials) {
156
- logger.debug(`Artifact writer: GcsArtifactWriter (direct GCS via ADC, bucket=${bucket})`);
212
+ logger.debug(`Artifact remote backend: GcsArtifactWriter (ADC, bucket=${bucket})`);
157
213
  return new GcsArtifactWriter({ bucket });
158
214
  }
159
- // Local dev — request signed PUT URLs from the API gateway, no GCS creds needed.
160
215
  if (config.apiKey && config.apiUrl) {
161
- logger.debug(`Artifact writer: ApiGatewayArtifactWriter (signed URL via ${config.apiUrl}, bucket=${bucket})`);
216
+ logger.debug(`Artifact remote backend: ApiGatewayArtifactWriter (via ${config.apiUrl}, bucket=${bucket})`);
162
217
  return new ApiGatewayArtifactWriter({
163
218
  apiBaseUrl: config.apiUrl,
164
219
  apiKey: config.apiKey,
165
220
  bucket,
166
221
  });
167
222
  }
168
- logger.debug("Artifact upload skipped: no GCS credentials or AILF_API_KEY available");
169
- return undefined;
223
+ return null;
170
224
  }
171
225
  function createCache(config) {
172
226
  const local = new FilesystemCache(config.rootDir);
@@ -11,7 +11,7 @@
11
11
  * each step completes. This enables the GET /v1/jobs/:jobId polling
12
12
  * endpoint to show real-time progress.
13
13
  */
14
- import type { AppContext, PipelineResult, PipelineStep } from "../_vendor/ailf-core/index.d.ts";
14
+ import { type AppContext, type PipelineResult, type PipelineStep } from "../_vendor/ailf-core/index.d.ts";
15
15
  /**
16
16
  * Run a sequence of pipeline steps, short-circuiting on required step failure.
17
17
  *
@@ -11,6 +11,7 @@
11
11
  * each step completes. This enables the GET /v1/jobs/:jobId polling
12
12
  * endpoint to show real-time progress.
13
13
  */
14
+ import { assoc, } from "../_vendor/ailf-core/index.js";
14
15
  import { runStep } from "./step-runner.js";
15
16
  // ---------------------------------------------------------------------------
16
17
  // Job progress reporter
@@ -75,28 +76,40 @@ async function reportJobProgress(ctx, stepName, completedSteps, totalSteps, stat
75
76
  * Capture a snapshot of the pipeline config, final state, and step results.
76
77
  * Strips secrets (API keys, tokens) from the config.
77
78
  */
78
- function capturePipelineContext(ctx, state, results) {
79
- if (!ctx.collector.enabled)
80
- return;
79
+ async function capturePipelineContext(ctx, state, results) {
81
80
  const sanitized = Object.fromEntries(Object.entries(ctx.config).filter(([k]) => !/token|secret|key/i.test(k)));
82
- ctx.collector.capture("pipeline", "pipeline-context", {
83
- config: sanitized,
84
- state: {
85
- reportId: state.reportId,
86
- evalFingerprint: state.evalFingerprint,
87
- belowCritical: state.belowCritical,
88
- remoteCacheHits: state.remoteCacheHits
89
- ? [...state.remoteCacheHits]
90
- : undefined,
91
- releaseAutoScope: state.releaseAutoScope,
92
- testSummary: state.testSummary,
93
- },
94
- steps: Object.entries(results).map(([name, result]) => ({
95
- name,
96
- status: result.status,
97
- durationMs: result.status !== "skipped" ? result.durationMs : undefined,
98
- })),
99
- });
81
+ // W0050 — migrated from ctx.collector.capture("pipeline", "pipeline-context", …)
82
+ // to the registry-driven emit() path. The writer handles redaction,
83
+ // --capture-exclude gating, and local+GCS fanout internally.
84
+ //
85
+ // Awaited (not fire-and-forget) so the write is observable by the
86
+ // orchestrator's caller — a fire-and-forget let the emit fall through
87
+ // to runtime teardown in tests with aggressive afterEach cleanup.
88
+ // `emit` is non-blocking internally (P5): failures return null + warn,
89
+ // never throw, so awaiting can't surface a rejected promise either.
90
+ try {
91
+ await ctx.artifactWriter.emit("pipelineContext", assoc(ctx), {
92
+ config: sanitized,
93
+ state: {
94
+ reportId: state.reportId,
95
+ evalFingerprint: state.evalFingerprint,
96
+ belowCritical: state.belowCritical,
97
+ remoteCacheHits: state.remoteCacheHits
98
+ ? [...state.remoteCacheHits]
99
+ : undefined,
100
+ releaseAutoScope: state.releaseAutoScope,
101
+ testSummary: state.testSummary,
102
+ },
103
+ steps: Object.entries(results).map(([name, result]) => ({
104
+ name,
105
+ status: result.status,
106
+ durationMs: result.status !== "skipped" ? result.durationMs : undefined,
107
+ })),
108
+ });
109
+ }
110
+ catch (err) {
111
+ ctx.logger.debug(`pipelineContext emit rejected: ${err instanceof Error ? err.message : String(err)}`);
112
+ }
100
113
  }
101
114
  /**
102
115
  * Flush captured artifacts to disk. Non-blocking — failures are logged
@@ -170,10 +183,10 @@ export async function orchestratePipeline(ctx, steps) {
170
183
  }, jobUpdates);
171
184
  }
172
185
  // Capture pipeline context and job updates before flushing
173
- capturePipelineContext(ctx, state, results);
174
- if (jobUpdates.length > 0) {
175
- ctx.collector.capture("job-store", "job-updates", jobUpdates);
176
- }
186
+ await capturePipelineContext(ctx, state, results);
187
+ // W0050 `job-updates` was an observability-only capture not tied
188
+ // to a registered artifact type; dropped here. Use the JobStore
189
+ // path if job telemetry is needed.
177
190
  // Flush captured artifacts even on failure (partial capture is useful)
178
191
  await flushArtifacts(ctx);
179
192
  return {
@@ -229,11 +242,9 @@ export async function orchestratePipeline(ctx, steps) {
229
242
  ctx.logger.warn("Failed to report job completion — continuing");
230
243
  }
231
244
  }
232
- // Capture pipeline context and job updates before flushing
233
- capturePipelineContext(ctx, state, results);
234
- if (jobUpdates.length > 0) {
235
- ctx.collector.capture("job-store", "job-updates", jobUpdates);
236
- }
245
+ // Capture pipeline context. `job-updates` observability captures were
246
+ // dropped in Slice 6.1 — JobStore is the supported telemetry path.
247
+ await capturePipelineContext(ctx, state, results);
237
248
  // Flush captured artifacts (non-blocking — failures never affect pipeline result)
238
249
  await flushArtifacts(ctx);
239
250
  return {
@@ -4,7 +4,7 @@
4
4
  * Calls calculateAndWriteScores() from pipeline/calculate-scores.ts with
5
5
  * typed options derived from AppContext. No env bridge needed.
6
6
  */
7
- import type { AppContext, PipelineState, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
7
+ import { type AppContext, type PipelineState, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
8
8
  export declare class CalculateScoresStep implements PipelineStep {
9
9
  readonly name = "calculate-scores";
10
10
  check(): ValidationIssue[];
@@ -6,6 +6,8 @@
6
6
  */
7
7
  import { existsSync, readFileSync } from "node:fs";
8
8
  import { join, resolve } from "path";
9
+ import { assoc, } from "../../_vendor/ailf-core/index.js";
10
+ import { emitFileContents } from "../../artifact-capture/emit-file.js";
9
11
  import { LiteracyVariant } from "../../pipeline/normalize-mode.js";
10
12
  import { getStepInputPaths } from "../../pipeline/cache.js";
11
13
  import { buildCacheContext } from "../cache-context.js";
@@ -122,16 +124,15 @@ export class CalculateScoresStep {
122
124
  state.belowCritical = belowCritical;
123
125
  }
124
126
  // Capture score artifacts
127
+ // W0050 — score-summary → scoreSummary (run-scoped bulk).
128
+ // grader-judgments.json and test-results.json were aggregated captures
129
+ // without registered descriptors. graderJudgments is now per-entry
130
+ // ({run, mode, task, model, grader}) and lands via run-eval-step in
131
+ // Slice 6.6; the aggregated file is dropped.
125
132
  const resultsDir = join(ctx.config.rootDir, "results", "latest");
126
- for (const file of [
127
- "score-summary.json",
128
- "grader-judgments.json",
129
- "test-results.json",
130
- ]) {
131
- const filePath = join(resultsDir, file);
132
- if (existsSync(filePath)) {
133
- ctx.collector.captureFile("calculate-scores", file.replace(".json", ""), filePath);
134
- }
133
+ const summaryPath = join(resultsDir, "score-summary.json");
134
+ if (existsSync(summaryPath)) {
135
+ await emitFileContents(ctx.artifactWriter, "scoreSummary", assoc(ctx), summaryPath);
135
136
  }
136
137
  // Upload testOutputs to GCS (D0032 — non-blocking, P5).
137
138
  // Read from test-results.json rather than score-summary.json: the
@@ -142,16 +143,15 @@ export class CalculateScoresStep {
142
143
  // The full responseOutput lives in the GCS artifact; PublishReportStep
143
144
  // later strips it from the inline Content Lake document when this
144
145
  // upload succeeds.
145
- if (ctx.artifactWriter) {
146
- const testResults = tryReadTestResults(ctx.config.rootDir);
147
- if (testResults?.length) {
148
- const artifactRef = await uploadTestOutputs(ctx.artifactWriter, ctx.runId, testResults);
149
- if (artifactRef) {
150
- state.artifactRefs = {
151
- ...state.artifactRefs,
152
- testOutputs: artifactRef,
153
- };
154
- }
146
+ // W0050 — ctx.artifactWriter is always present; no guard needed.
147
+ const testResults = tryReadTestResults(ctx.config.rootDir);
148
+ if (testResults?.length) {
149
+ const artifactRef = await uploadTestOutputs(ctx.artifactWriter, ctx.runId, testResults);
150
+ if (artifactRef) {
151
+ state.artifactRefs = {
152
+ ...state.artifactRefs,
153
+ testOutputs: artifactRef,
154
+ };
155
155
  }
156
156
  }
157
157
  const criticalSuffix = belowCritical.length > 0
@@ -11,7 +11,7 @@
11
11
  * @see packages/eval/src/pipeline/callback-delivery.ts
12
12
  * @see docs/design-docs/api-service-gateway.md
13
13
  */
14
- import type { AppContext, PipelineState, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
14
+ import { type AppContext, type PipelineState, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
15
15
  import { type CallbackConfig } from "../../pipeline/callback-delivery.js";
16
16
  export declare class CallbackStep implements PipelineStep {
17
17
  private readonly callback;
@@ -13,6 +13,7 @@
13
13
  */
14
14
  import { readFileSync } from "fs";
15
15
  import { resolve } from "path";
16
+ import { assoc, } from "../../_vendor/ailf-core/index.js";
16
17
  import { deliverCallback, } from "../../pipeline/callback-delivery.js";
17
18
  export class CallbackStep {
18
19
  callback;
@@ -58,11 +59,12 @@ export class CallbackStep {
58
59
  reportId: state.reportId,
59
60
  summary,
60
61
  };
61
- // Capture callback payload (Tier 2 no secrets: headers are NOT captured)
62
- ctx.collector.capture("callback", "callback-payload", callbackPayload);
62
+ // W0050callbackRequest/callbackResponse are per-entry artifacts
63
+ // keyed by the callback target URL (the `name` slot on the association).
64
+ const callbackName = this.callback.url;
65
+ await ctx.artifactWriter.emit("callbackRequest", assoc(ctx, { name: callbackName }), callbackPayload);
63
66
  const result = await deliverCallback(this.callback, callbackPayload);
64
- // Capture callback response status (not the body that's the user's system)
65
- ctx.collector.capture("callback", "callback-response", {
67
+ await ctx.artifactWriter.emit("callbackResponse", assoc(ctx, { name: callbackName }), {
66
68
  ok: result.ok,
67
69
  attempts: result.attempts,
68
70
  error: result.error,
@@ -5,7 +5,7 @@
5
5
  * inlined directly from the former pipeline/steps/compare-step.ts.
6
6
  * This is an optional step — failure doesn't stop the pipeline.
7
7
  */
8
- import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
8
+ import { type AppContext, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
9
9
  export declare class CompareStep implements PipelineStep {
10
10
  readonly name = "compare";
11
11
  readonly optional = true;
@@ -7,6 +7,8 @@
7
7
  */
8
8
  import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync, } from "fs";
9
9
  import { join, resolve } from "path";
10
+ import { assoc, } from "../../_vendor/ailf-core/index.js";
11
+ import { emitFileContents } from "../../artifact-capture/emit-file.js";
10
12
  import { compare } from "../../pipeline/compare.js";
11
13
  export class CompareStep {
12
14
  name = "compare";
@@ -69,8 +71,8 @@ export class CompareStep {
69
71
  mkdirSync(ctx.config.outputDir, { recursive: true });
70
72
  const reportPath = resolve(ctx.config.outputDir, "comparison-report.json");
71
73
  writeFileSync(reportPath, JSON.stringify(report, null, 2));
72
- // Capture comparison report
73
- ctx.collector.captureFile("compare", "comparison-report", reportPath);
74
+ // W0050 comparisonReport is per-entry keyed by mode ({run, mode}).
75
+ await emitFileContents(ctx.artifactWriter, "comparisonReport", assoc(ctx, { mode: ctx.config.mode }), reportPath);
74
76
  // Build summary
75
77
  const improved = report.improved.length;
76
78
  const regressed = report.regressed.length;
@@ -4,7 +4,7 @@
4
4
  * Calls pure functions from pipeline/discovery-report.ts directly.
5
5
  * Optional step — failure doesn't stop the pipeline.
6
6
  */
7
- import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
7
+ import { type AppContext, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
8
8
  export declare class DiscoveryReportStep implements PipelineStep {
9
9
  readonly name = "discovery-report";
10
10
  readonly optional = true;
@@ -6,6 +6,8 @@
6
6
  */
7
7
  import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
8
8
  import { resolve } from "path";
9
+ import { assoc, } from "../../_vendor/ailf-core/index.js";
10
+ import { emitFileContents } from "../../artifact-capture/emit-file.js";
9
11
  import { formatDiscoveryMarkdown, generateDiscoveryReport, } from "../../pipeline/discovery-report.js";
10
12
  export class DiscoveryReportStep {
11
13
  name = "discovery-report";
@@ -38,7 +40,8 @@ export class DiscoveryReportStep {
38
40
  mkdirSync(ctx.config.outputDir, { recursive: true });
39
41
  const discoveryPath = resolve(ctx.config.outputDir, "discovery-report.md");
40
42
  writeFileSync(discoveryPath, md);
41
- ctx.collector.captureFile("discovery-report", "discovery-report", discoveryPath);
43
+ // W0050 — discoveryReport is per-entry keyed by mode.
44
+ await emitFileContents(ctx.artifactWriter, "discoveryReport", assoc(ctx, { mode: ctx.config.mode }), discoveryPath);
42
45
  console.log(md);
43
46
  const invisible = report.invisibleDocs.length;
44
47
  const f1 = report.overall.avgF1.toFixed(2);