@sanity/ailf 3.0.0 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/dist/_vendor/ailf-core/artifact-capture/association.d.ts +37 -0
  2. package/dist/_vendor/ailf-core/artifact-capture/association.js +19 -0
  3. package/dist/_vendor/ailf-core/index.d.ts +1 -1
  4. package/dist/_vendor/ailf-core/index.js +1 -1
  5. package/dist/_vendor/ailf-core/ports/context.d.ts +8 -0
  6. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  7. package/dist/_vendor/ailf-core/ports/index.js +1 -0
  8. package/dist/_vendor/ailf-core/ports/progress-reporter.d.ts +74 -0
  9. package/dist/_vendor/ailf-core/ports/progress-reporter.js +26 -0
  10. package/dist/_vendor/ailf-core/services/slim-report-summary.js +1 -16
  11. package/dist/adapters/progress/console-progress-reporter.d.ts +35 -0
  12. package/dist/adapters/progress/console-progress-reporter.js +110 -0
  13. package/dist/artifact-capture/api-gateway-artifact-writer.d.ts +8 -1
  14. package/dist/artifact-capture/api-gateway-artifact-writer.js +79 -42
  15. package/dist/artifact-capture/batching-api-gateway-artifact-writer.d.ts +108 -0
  16. package/dist/artifact-capture/batching-api-gateway-artifact-writer.js +492 -0
  17. package/dist/artifact-capture/fanout-artifact-writer.d.ts +14 -2
  18. package/dist/artifact-capture/fanout-artifact-writer.js +25 -4
  19. package/dist/artifact-capture/gcs-artifact-writer.d.ts +27 -1
  20. package/dist/artifact-capture/gcs-artifact-writer.js +168 -38
  21. package/dist/artifact-capture/instrumented-artifact-writer.d.ts +32 -0
  22. package/dist/artifact-capture/instrumented-artifact-writer.js +151 -0
  23. package/dist/artifact-capture/local-fs-artifact-writer.d.ts +8 -1
  24. package/dist/artifact-capture/local-fs-artifact-writer.js +23 -4
  25. package/dist/artifact-capture/parallel-emit.d.ts +43 -0
  26. package/dist/artifact-capture/parallel-emit.js +84 -0
  27. package/dist/artifact-capture/upload-metrics.d.ts +62 -0
  28. package/dist/artifact-capture/upload-metrics.js +125 -0
  29. package/dist/composition-root.d.ts +2 -2
  30. package/dist/composition-root.js +97 -11
  31. package/dist/orchestration/pipeline-orchestrator.js +97 -1
  32. package/dist/orchestration/steps/calculate-scores-step.js +9 -7
  33. package/dist/orchestration/steps/finalize-run-step.js +40 -8
  34. package/dist/pipeline/emit-eval-results.js +29 -11
  35. package/dist/pipeline/upload-test-outputs.d.ts +12 -5
  36. package/dist/pipeline/upload-test-outputs.js +27 -10
  37. package/package.json +1 -1
@@ -0,0 +1,43 @@
1
+ /**
2
+ * parallel-emit.ts — W0056 prototype A (client-side parallelism).
3
+ *
4
+ * Bounded-concurrency helper for fanning out artifact emits. The baseline
5
+ * measurement (see `docs/design-docs/artifact-upload-throughput.md`) shows
6
+ * producer loops call `await writer.emit(...)` serially, and per-artifact
7
+ * wall clock is dominated by GCS response latency. A simple `p-limit(N)`
8
+ * turns that into a batched-parallel flow against the existing writers.
9
+ *
10
+ * Gated on `AILF_PARALLEL_UPLOAD`:
11
+ * - unset → use the per-writer default set at composition time.
12
+ * - "0" → forced serial (override when default-on is undesirable).
13
+ * - "1" → parallel, default concurrency 8.
14
+ * - "<N>" (N > 1 integer) → parallel with concurrency N.
15
+ *
16
+ * The per-writer default is set by the composition root via
17
+ * `setDefaultUploadConcurrency`. Writers with measured safe parallelism
18
+ * (GCS direct) set 8; writers still on serial (API Gateway, until the
19
+ * batching rollout completes) leave it at the module default of 1.
20
+ */
21
+ /**
22
+ * Set the default concurrency used when `AILF_PARALLEL_UPLOAD` is unset.
23
+ * Composition root calls this once per run based on the selected remote
24
+ * writer. Tests reset by passing 1.
25
+ */
26
+ export declare function setDefaultUploadConcurrency(n: number): void;
27
+ /** Exposed for tests — returns the current module default. */
28
+ export declare function getDefaultUploadConcurrency(): number;
29
+ /**
30
+ * Resolve the configured concurrency. Returns 1 (serial) when parallelism is
31
+ * explicitly disabled or the env value is invalid; otherwise returns the
32
+ * per-writer module default when `AILF_PARALLEL_UPLOAD` is unset.
33
+ */
34
+ export declare function resolveUploadConcurrency(): number;
35
+ /**
36
+ * Run `fn` against every item with at most `concurrency` active at once.
37
+ * Preserves input order in the result array. Rejections propagate — callers
38
+ * with non-blocking semantics should catch inside `fn`.
39
+ *
40
+ * When `concurrency <= 1`, runs strictly serially (drop-in equivalent of a
41
+ * `for … await` loop).
42
+ */
43
+ export declare function parallelMap<T, R>(items: readonly T[], concurrency: number, fn: (item: T, index: number) => Promise<R>): Promise<R[]>;
@@ -0,0 +1,84 @@
1
+ /**
2
+ * parallel-emit.ts — W0056 prototype A (client-side parallelism).
3
+ *
4
+ * Bounded-concurrency helper for fanning out artifact emits. The baseline
5
+ * measurement (see `docs/design-docs/artifact-upload-throughput.md`) shows
6
+ * producer loops call `await writer.emit(...)` serially, and per-artifact
7
+ * wall clock is dominated by GCS response latency. A simple `p-limit(N)`
8
+ * turns that into a batched-parallel flow against the existing writers.
9
+ *
10
+ * Gated on `AILF_PARALLEL_UPLOAD`:
11
+ * - unset → use the per-writer default set at composition time.
12
+ * - "0" → forced serial (override when default-on is undesirable).
13
+ * - "1" → parallel, default concurrency 8.
14
+ * - "<N>" (N > 1 integer) → parallel with concurrency N.
15
+ *
16
+ * The per-writer default is set by the composition root via
17
+ * `setDefaultUploadConcurrency`. Writers with measured safe parallelism
18
+ * (GCS direct) set 8; writers still on serial (API Gateway, until the
19
+ * batching rollout completes) leave it at the module default of 1.
20
+ */
21
+ const DEFAULT_CONCURRENCY = 8;
22
+ let moduleDefault = 1;
23
+ /**
24
+ * Set the default concurrency used when `AILF_PARALLEL_UPLOAD` is unset.
25
+ * Composition root calls this once per run based on the selected remote
26
+ * writer. Tests reset by passing 1.
27
+ */
28
+ export function setDefaultUploadConcurrency(n) {
29
+ moduleDefault = n >= 1 ? n : 1;
30
+ }
31
+ /** Exposed for tests — returns the current module default. */
32
+ export function getDefaultUploadConcurrency() {
33
+ return moduleDefault;
34
+ }
35
+ /**
36
+ * Resolve the configured concurrency. Returns 1 (serial) when parallelism is
37
+ * explicitly disabled or the env value is invalid; otherwise returns the
38
+ * per-writer module default when `AILF_PARALLEL_UPLOAD` is unset.
39
+ */
40
+ export function resolveUploadConcurrency() {
41
+ const raw = process.env.AILF_PARALLEL_UPLOAD ?? "";
42
+ if (raw === "0")
43
+ return 1;
44
+ if (raw === "")
45
+ return moduleDefault;
46
+ if (raw === "1")
47
+ return DEFAULT_CONCURRENCY;
48
+ const parsed = Number.parseInt(raw, 10);
49
+ if (Number.isFinite(parsed) && parsed > 1)
50
+ return parsed;
51
+ return moduleDefault;
52
+ }
53
+ /**
54
+ * Run `fn` against every item with at most `concurrency` active at once.
55
+ * Preserves input order in the result array. Rejections propagate — callers
56
+ * with non-blocking semantics should catch inside `fn`.
57
+ *
58
+ * When `concurrency <= 1`, runs strictly serially (drop-in equivalent of a
59
+ * `for … await` loop).
60
+ */
61
+ export async function parallelMap(items, concurrency, fn) {
62
+ if (items.length === 0)
63
+ return [];
64
+ if (concurrency <= 1) {
65
+ const out = [];
66
+ for (let i = 0; i < items.length; i++) {
67
+ out.push(await fn(items[i], i));
68
+ }
69
+ return out;
70
+ }
71
+ const results = new Array(items.length);
72
+ let cursor = 0;
73
+ async function worker() {
74
+ while (true) {
75
+ const i = cursor++;
76
+ if (i >= items.length)
77
+ return;
78
+ results[i] = await fn(items[i], i);
79
+ }
80
+ }
81
+ const width = Math.min(concurrency, items.length);
82
+ await Promise.all(Array.from({ length: width }, () => worker()));
83
+ return results;
84
+ }
@@ -0,0 +1,62 @@
1
+ /**
2
+ * UploadMetrics — spike instrumentation for W0056 (faster artifact upload).
3
+ *
4
+ * Captures per-operation timing on the artifact-upload path so the spike has
5
+ * a measured baseline: artifact count, total bytes, wall-clock, and the
6
+ * sign-RTT vs. PUT split. Gated on `AILF_UPLOAD_METRICS=1` in the composition
7
+ * root — a no-op when off.
8
+ *
9
+ * Design:
10
+ * - `UploadMetricsSink` is the narrow interface that writers depend on.
11
+ * - `UploadMetrics` is the in-process implementation that buffers events
12
+ * and emits both a stderr summary table and an NDJSON detail file.
13
+ * - `summarize()` is called by `InstrumentedArtifactWriter` once, after
14
+ * `writeManifest` succeeds (the natural end-of-run signal).
15
+ *
16
+ * This file is a spike deliverable — the API is intentionally ad hoc and
17
+ * may be promoted to `packages/core/src/ports/` if we ship anything.
18
+ */
19
+ import type { Logger } from "../_vendor/ailf-core/index.d.ts";
20
+ export type UploadPhase = "sign" | "put" | "compose" | "emit" | "ndjson-part" | "manifest";
21
+ export interface UploadMetricEvent {
22
+ /** ISO timestamp the event was recorded. */
23
+ ts: string;
24
+ /** Phase being measured — writers record `sign`/`put`/`compose` at the call site; the decorator records `emit`/`ndjson-part`/`manifest` end-to-end. */
25
+ phase: UploadPhase;
26
+ /** Writer class that produced the event (e.g. "ApiGatewayArtifactWriter"). */
27
+ writer: string;
28
+ /** Artifact type (or `"manifest"`). */
29
+ type: string;
30
+ /** Wall-clock for the phase, in milliseconds. */
31
+ ms: number;
32
+ /** Body size in bytes, when applicable. */
33
+ bytes?: number;
34
+ /** True when the underlying call resolved without throwing / without a non-2xx response. */
35
+ success: boolean;
36
+ }
37
+ export interface UploadMetricsSink {
38
+ record(event: Omit<UploadMetricEvent, "ts">): void;
39
+ }
40
+ /**
41
+ * No-op sink — writers default to this when metrics are off, so the
42
+ * instrumentation call sites remain uniform whether or not the collector is
43
+ * active.
44
+ */
45
+ export declare const NO_OP_UPLOAD_METRICS: UploadMetricsSink;
46
+ export interface UploadMetricsOptions {
47
+ /** Logger used for the summary table. */
48
+ logger: Logger;
49
+ /** Absolute path where the NDJSON detail file is written. Skipped when undefined. */
50
+ detailFile?: string;
51
+ }
52
+ export declare class UploadMetrics implements UploadMetricsSink {
53
+ private readonly options;
54
+ private readonly events;
55
+ private summarized;
56
+ constructor(options: UploadMetricsOptions);
57
+ record(event: Omit<UploadMetricEvent, "ts">): void;
58
+ summarize(): Promise<void>;
59
+ /** Exposed for tests — returns a copy. */
60
+ snapshot(): readonly UploadMetricEvent[];
61
+ }
62
+ export declare function buildSummaryTable(events: readonly UploadMetricEvent[]): string;
@@ -0,0 +1,125 @@
1
+ /**
2
+ * UploadMetrics — spike instrumentation for W0056 (faster artifact upload).
3
+ *
4
+ * Captures per-operation timing on the artifact-upload path so the spike has
5
+ * a measured baseline: artifact count, total bytes, wall-clock, and the
6
+ * sign-RTT vs. PUT split. Gated on `AILF_UPLOAD_METRICS=1` in the composition
7
+ * root — a no-op when off.
8
+ *
9
+ * Design:
10
+ * - `UploadMetricsSink` is the narrow interface that writers depend on.
11
+ * - `UploadMetrics` is the in-process implementation that buffers events
12
+ * and emits both a stderr summary table and an NDJSON detail file.
13
+ * - `summarize()` is called by `InstrumentedArtifactWriter` once, after
14
+ * `writeManifest` succeeds (the natural end-of-run signal).
15
+ *
16
+ * This file is a spike deliverable — the API is intentionally ad hoc and
17
+ * may be promoted to `packages/core/src/ports/` if we ship anything.
18
+ */
19
+ import { mkdir, writeFile } from "node:fs/promises";
20
+ import { dirname } from "node:path";
21
+ // ---------------------------------------------------------------------------
22
+ // Implementation
23
+ // ---------------------------------------------------------------------------
24
+ /**
25
+ * No-op sink — writers default to this when metrics are off, so the
26
+ * instrumentation call sites remain uniform whether or not the collector is
27
+ * active.
28
+ */
29
+ export const NO_OP_UPLOAD_METRICS = {
30
+ record() { },
31
+ };
32
+ export class UploadMetrics {
33
+ options;
34
+ events = [];
35
+ summarized = false;
36
+ constructor(options) {
37
+ this.options = options;
38
+ }
39
+ record(event) {
40
+ this.events.push({ ...event, ts: new Date().toISOString() });
41
+ }
42
+ async summarize() {
43
+ if (this.summarized)
44
+ return;
45
+ this.summarized = true;
46
+ const { logger, detailFile } = this.options;
47
+ if (this.events.length === 0) {
48
+ logger.info("[upload-metrics] no events recorded");
49
+ return;
50
+ }
51
+ const table = buildSummaryTable(this.events);
52
+ logger.info(`[upload-metrics] ${this.events.length} events recorded\n${table}`);
53
+ if (detailFile) {
54
+ try {
55
+ await mkdir(dirname(detailFile), { recursive: true });
56
+ const body = this.events.map((e) => JSON.stringify(e)).join("\n") + "\n";
57
+ await writeFile(detailFile, body, "utf-8");
58
+ logger.info(`[upload-metrics] detail written to ${detailFile}`);
59
+ }
60
+ catch (err) {
61
+ const message = err instanceof Error ? err.message : String(err);
62
+ logger.warn(`[upload-metrics] failed to write detail file "${detailFile}": ${message}`);
63
+ }
64
+ }
65
+ }
66
+ /** Exposed for tests — returns a copy. */
67
+ snapshot() {
68
+ return [...this.events];
69
+ }
70
+ }
71
+ export function buildSummaryTable(events) {
72
+ const byKey = new Map();
73
+ for (const ev of events) {
74
+ const key = `${ev.phase}\t${ev.writer}`;
75
+ let bucket = byKey.get(key);
76
+ if (!bucket) {
77
+ bucket = [];
78
+ byKey.set(key, bucket);
79
+ }
80
+ bucket.push(ev);
81
+ }
82
+ const rows = [];
83
+ for (const [key, bucket] of byKey) {
84
+ const phase = key.split("\t").join(" · ");
85
+ const durations = bucket.map((e) => e.ms).sort((a, b) => a - b);
86
+ const totalMs = durations.reduce((sum, ms) => sum + ms, 0);
87
+ const totalBytes = bucket.reduce((sum, e) => sum + (e.bytes ?? 0), 0);
88
+ const failures = bucket.filter((e) => !e.success).length;
89
+ rows.push({
90
+ phase,
91
+ count: bucket.length,
92
+ failures,
93
+ totalMs,
94
+ totalBytes,
95
+ p50: percentile(durations, 0.5),
96
+ p95: percentile(durations, 0.95),
97
+ max: durations[durations.length - 1] ?? 0,
98
+ });
99
+ }
100
+ rows.sort((a, b) => b.totalMs - a.totalMs);
101
+ const header = "phase | n | fail | bytes | total ms | p50 | p95 | max";
102
+ const sep = "-----------------------------------------------+-----+------+-------------+----------+-----+-----+-----";
103
+ const body = rows
104
+ .map((r) => `${pad(r.phase, 47)}| ${pad(String(r.count), 4)}| ${pad(String(r.failures), 5)}| ${pad(formatBytes(r.totalBytes), 12)}| ${pad(String(Math.round(r.totalMs)), 9)}| ${pad(String(Math.round(r.p50)), 4)}| ${pad(String(Math.round(r.p95)), 4)}| ${Math.round(r.max)}`)
105
+ .join("\n");
106
+ return `${header}\n${sep}\n${body}`;
107
+ }
108
+ function percentile(sorted, p) {
109
+ if (sorted.length === 0)
110
+ return 0;
111
+ const idx = Math.min(sorted.length - 1, Math.max(0, Math.ceil(sorted.length * p) - 1));
112
+ return sorted[idx] ?? 0;
113
+ }
114
+ function pad(s, width) {
115
+ return s.length >= width ? `${s} ` : s + " ".repeat(width - s.length);
116
+ }
117
+ function formatBytes(n) {
118
+ if (n < 1024)
119
+ return `${n} B`;
120
+ if (n < 1024 * 1024)
121
+ return `${(n / 1024).toFixed(1)} KB`;
122
+ if (n < 1024 * 1024 * 1024)
123
+ return `${(n / 1024 / 1024).toFixed(1)} MB`;
124
+ return `${(n / 1024 / 1024 / 1024).toFixed(2)} GB`;
125
+ }
@@ -15,7 +15,7 @@
15
15
  * @see packages/core/src/ports/context.ts — AppContext interface
16
16
  * @see docs/archive/exec-plans/ports-and-adapters/phase-7-composition-root.md
17
17
  */
18
- import { type AppContext, type ArtifactWriter, type AssertionRegistration, type Logger, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
18
+ import { type AppContext, type ArtifactWriter, type ArtifactWriterProgressOptions, type AssertionRegistration, type Logger, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
19
19
  /**
20
20
  * Create a fully wired AppContext from resolved configuration.
21
21
  *
@@ -41,7 +41,7 @@ export declare function createAppContext(config: ResolvedConfig): AppContext;
41
41
  *
42
42
  * Exported for unit-test access; not part of the public package API.
43
43
  */
44
- export declare function createArtifactWriter(config: ResolvedConfig, logger: Logger): ArtifactWriter;
44
+ export declare function createArtifactWriter(config: ResolvedConfig, logger: Logger, progress?: ArtifactWriterProgressOptions): ArtifactWriter;
45
45
  /**
46
46
  * Generic Promptfoo assertion types available to all evaluation modes.
47
47
  *
@@ -15,17 +15,22 @@
15
15
  * @see packages/core/src/ports/context.ts — AppContext interface
16
16
  * @see docs/archive/exec-plans/ports-and-adapters/phase-7-composition-root.md
17
17
  */
18
- import { InMemoryPluginRegistry, NoOpArtifactWriter, generateRunId, isArtifactType, } from "./_vendor/ailf-core/index.js";
18
+ import { ARTIFACT_EXPORT_PHASE_ID, InMemoryPluginRegistry, NoOpArtifactWriter, NoOpProgressReporter, generateRunId, isArtifactType, } from "./_vendor/ailf-core/index.js";
19
19
  import { AccumulatingArtifactWriter } from "./artifact-capture/accumulating-artifact-writer.js";
20
20
  import { ApiGatewayArtifactWriter } from "./artifact-capture/api-gateway-artifact-writer.js";
21
+ import { BatchingApiGatewayArtifactWriter } from "./artifact-capture/batching-api-gateway-artifact-writer.js";
21
22
  import { FanoutArtifactWriter } from "./artifact-capture/fanout-artifact-writer.js";
22
23
  import { GcsArtifactWriter } from "./artifact-capture/gcs-artifact-writer.js";
24
+ import { InstrumentedArtifactWriter } from "./artifact-capture/instrumented-artifact-writer.js";
23
25
  import { LocalFilesystemArtifactWriter } from "./artifact-capture/local-fs-artifact-writer.js";
26
+ import { resolveUploadConcurrency, setDefaultUploadConcurrency, } from "./artifact-capture/parallel-emit.js";
27
+ import { UploadMetrics } from "./artifact-capture/upload-metrics.js";
24
28
  import { ContentLakeCacheAdapter } from "./adapters/cache/content-lake-cache.js";
25
29
  import { loadExternalPresets } from "./pipeline/compiler/preset-loader.js";
26
30
  import { FilesystemCache } from "./adapters/cache/filesystem-cache.js";
27
31
  import { PromptfooEvalAdapter } from "./adapters/eval-runners/promptfoo-eval-adapter.js";
28
32
  import { ConsoleLogger, JsonLogger, QuietLogger, } from "./adapters/loggers/index.js";
33
+ import { ConsoleProgressReporter } from "./adapters/progress/console-progress-reporter.js";
29
34
  import { CompositeTaskSource, ContentLakeTaskSource, RepoTaskSource, } from "./adapters/task-sources/index.js";
30
35
  import { createAgentHarnessBase, createKnowledgeProbeBase, createLiteracyModeBase, createMcpServerModeBase, } from "./pipeline/compiler/mode-bases/index.js";
31
36
  import { createSanityLiteracyPreset } from "./pipeline/compiler/presets/index.js";
@@ -41,6 +46,9 @@ import { loadSinks } from "./sinks/index.js";
41
46
  export function createAppContext(config) {
42
47
  // Logger — selected by env var preferences
43
48
  const logger = createLogger();
49
+ // Progress reporter — console-backed for the default logger; no-op for
50
+ // JSON/quiet modes and tests where interactive output is inappropriate.
51
+ const progress = createProgressReporter();
44
52
  // Cache — filesystem, optionally decorated with Content Lake fallback
45
53
  const cache = config.noCache ? undefined : createCache(config);
46
54
  // Task source — selected by config.taskSourceType
@@ -64,7 +72,12 @@ export function createAppContext(config) {
64
72
  // `runs/{runId}/…` paths (D0032). Auto-detects the right adapter from
65
73
  // available credentials; defaults bucket to "ailf-artifacts". Set
66
74
  // artifactUpload: false to opt out entirely.
67
- const artifactWriter = createArtifactWriter(config, logger);
75
+ // W0053 writers receive a progress reporter scoped to a single
76
+ // `artifact-export` phase so the CLI can render per-batch updates.
77
+ const artifactWriter = createArtifactWriter(config, logger, {
78
+ reporter: progress,
79
+ phaseId: ARTIFACT_EXPORT_PHASE_ID,
80
+ });
68
81
  // Generate the pipeline's RunId once; every downstream step reads it
69
82
  // from the context (D0032).
70
83
  const runId = generateRunId();
@@ -76,6 +89,7 @@ export function createAppContext(config) {
76
89
  docFetcher,
77
90
  evalRunner,
78
91
  logger,
92
+ progress,
79
93
  registry,
80
94
  reportStore,
81
95
  runId,
@@ -98,6 +112,23 @@ function createLogger() {
98
112
  process.env.AILF_VERBOSE === "1",
99
113
  });
100
114
  }
115
+ /**
116
+ * Select a ProgressReporter adapter. Matches the logger environment — JSON
117
+ * and quiet loggers get a no-op reporter so machine-readable output stays
118
+ * clean; interactive sessions get the console adapter with verbose mirroring.
119
+ */
120
+ function createProgressReporter() {
121
+ if (process.env.AILF_LOG_FORMAT === "json")
122
+ return new NoOpProgressReporter();
123
+ if (process.env.AILF_LOG_LEVEL === "quiet" ||
124
+ process.env.AILF_QUIET === "1") {
125
+ return new NoOpProgressReporter();
126
+ }
127
+ return new ConsoleProgressReporter({
128
+ verbose: process.env.AILF_LOG_LEVEL === "verbose" ||
129
+ process.env.AILF_VERBOSE === "1",
130
+ });
131
+ }
101
132
  /**
102
133
  * Shared GCS bucket for report artifacts. Matches the gateway default at
103
134
  * packages/api/src/routes/artifacts.ts — both sides assume ailf-artifacts
@@ -129,7 +160,7 @@ const DEFAULT_LOCAL_ARTIFACTS_DIR = ".ailf/results/captures";
129
160
  *
130
161
  * Exported for unit-test access; not part of the public package API.
131
162
  */
132
- export function createArtifactWriter(config, logger) {
163
+ export function createArtifactWriter(config, logger, progress) {
133
164
  // Legacy `artifactUpload: false` still disables — treat as an alias for
134
165
  // the canonical `artifactsDisabled: true` until W0052 removes it.
135
166
  if (config.artifactsDisabled === true || config.artifactUpload === false) {
@@ -138,10 +169,27 @@ export function createArtifactWriter(config, logger) {
138
169
  }
139
170
  const exclude = resolveExcludeList(config.artifactsExclude, logger);
140
171
  const rootDir = config.artifactsDir ?? DEFAULT_LOCAL_ARTIFACTS_DIR;
141
- const local = new LocalFilesystemArtifactWriter({ rootDir, exclude });
142
- const remote = createRemoteArtifactWriter(config, logger);
172
+ // W0056 opt-in measurement of the upload path. The collector is passed
173
+ // to the remote writer (where sign/PUT/compose phases live) AND wraps the
174
+ // final writer to record caller-observed `emit`/`writeManifest` totals.
175
+ // `summarize()` fires from the decorator's `writeManifest` hook.
176
+ const metrics = process.env.AILF_UPLOAD_METRICS === "1"
177
+ ? new UploadMetrics({
178
+ logger,
179
+ detailFile: `${rootDir}/upload-metrics/run-${Date.now()}.ndjson`,
180
+ })
181
+ : null;
182
+ // W0053: progress attaches to the OUTERMOST of (local-only | fanout). When
183
+ // fanout is wired, the delegates stay silent so we don't double-count the
184
+ // same caller-visible write across two backends.
185
+ const remote = createRemoteArtifactWriter(config, logger, metrics);
186
+ const local = new LocalFilesystemArtifactWriter({
187
+ rootDir,
188
+ exclude,
189
+ ...(remote ? {} : { progress }),
190
+ });
143
191
  const base = remote
144
- ? new FanoutArtifactWriter([local, remote])
192
+ ? new FanoutArtifactWriter([local, remote], { progress })
145
193
  : local;
146
194
  if (!remote) {
147
195
  logger.debug(`Artifact writer: LocalFilesystemArtifactWriter only (rootDir=${rootDir})`);
@@ -153,7 +201,10 @@ export function createArtifactWriter(config, logger) {
153
201
  // RunManifest without each producer bookkeeping its own ArtifactRefs
154
202
  // (W0051 Slice 3 revisit — Option B of the "manifest empty on real runs"
155
203
  // fix).
156
- return new AccumulatingArtifactWriter(base);
204
+ const accumulating = new AccumulatingArtifactWriter(base);
205
+ return metrics
206
+ ? new InstrumentedArtifactWriter(accumulating, metrics)
207
+ : accumulating;
157
208
  }
158
209
  /**
159
210
  * Validate the exclude list against the registry. Unknown types are dropped
@@ -179,19 +230,54 @@ function resolveExcludeList(raw, logger) {
179
230
  * the sole backend for that run, which is the D0033 M4 default for laptops
180
231
  * and CI without GCS creds.
181
232
  */
182
- function createRemoteArtifactWriter(config, logger) {
233
+ function createRemoteArtifactWriter(config, logger, metrics) {
183
234
  const bucket = config.artifactGcsBucket ?? DEFAULT_ARTIFACT_BUCKET;
184
235
  const hasGcsCredentials = Boolean(process.env.GOOGLE_APPLICATION_CREDENTIALS || process.env.GCLOUD_PROJECT);
185
236
  if (hasGcsCredentials) {
186
- logger.debug(`Artifact remote backend: GcsArtifactWriter (ADC, bucket=${bucket})`);
187
- return new GcsArtifactWriter({ bucket });
237
+ // W0056 Phase 1: the GCS-direct path measured 0 failures at
238
+ // concurrency 8 with a 60 % pipeline-time reduction. Flip parallelism
239
+ // on by default on this path. `AILF_PARALLEL_UPLOAD=0` still forces
240
+ // serial as an escape hatch.
241
+ setDefaultUploadConcurrency(8);
242
+ logger.debug(`Artifact remote backend: GcsArtifactWriter (ADC, bucket=${bucket}, defaultConcurrency=8)`);
243
+ return new GcsArtifactWriter({
244
+ bucket,
245
+ ...(metrics ? { metrics } : {}),
246
+ });
188
247
  }
189
248
  if (config.apiKey && config.apiUrl) {
190
- logger.debug(`Artifact remote backend: ApiGatewayArtifactWriter (via ${config.apiUrl}, bucket=${bucket})`);
249
+ // W0058 Phase 2: batching writer is the default on the API Gateway path.
250
+ // Prototype B (W0056) showed batch signing + client-side parallelism
251
+ // eliminates the 429 storm that single-URL parallelism triggered on the
252
+ // Vercel signing endpoint, at parity with the GCS-direct parallel path
253
+ // once the sign+PUT overlap optimization lands. Flip the default to 8
254
+ // concurrency; `AILF_PARALLEL_UPLOAD=0` forces serial as a rollback
255
+ // escape hatch and auto-selects the legacy single-URL writer.
256
+ setDefaultUploadConcurrency(8);
257
+ const concurrency = resolveUploadConcurrency();
258
+ if (concurrency > 1) {
259
+ logger.debug(`Artifact remote backend: BatchingApiGatewayArtifactWriter (via ${config.apiUrl}, bucket=${bucket}, putConcurrency=${concurrency})`);
260
+ // D0034: neither API Gateway writer supports NDJSON `appendNdjson`.
261
+ // Traces that flow through `appendNdjson` are dropped on this path.
262
+ // Surface the gap once at startup instead of ambushing users with a
263
+ // silent null ref at emit time.
264
+ logger.warn("Artifacts: API Gateway path selected without GCS ADC — " +
265
+ "trace (NDJSON) artifacts will be skipped (D0034). Set " +
266
+ "GOOGLE_APPLICATION_CREDENTIALS or GCLOUD_PROJECT to capture traces.");
267
+ return new BatchingApiGatewayArtifactWriter({
268
+ apiBaseUrl: config.apiUrl,
269
+ apiKey: config.apiKey,
270
+ bucket,
271
+ putConcurrency: concurrency,
272
+ ...(metrics ? { metrics } : {}),
273
+ });
274
+ }
275
+ logger.debug(`Artifact remote backend: ApiGatewayArtifactWriter (via ${config.apiUrl}, bucket=${bucket}, serial — AILF_PARALLEL_UPLOAD=0 override)`);
191
276
  return new ApiGatewayArtifactWriter({
192
277
  apiBaseUrl: config.apiUrl,
193
278
  apiKey: config.apiKey,
194
279
  bucket,
280
+ ...(metrics ? { metrics } : {}),
195
281
  });
196
282
  }
197
283
  return null;
@@ -11,7 +11,7 @@
11
11
  * each step completes. This enables the GET /v1/jobs/:jobId polling
12
12
  * endpoint to show real-time progress.
13
13
  */
14
- import { assoc, } from "../_vendor/ailf-core/index.js";
14
+ import { ARTIFACT_EXPORT_PHASE_ID, assoc, } from "../_vendor/ailf-core/index.js";
15
15
  import { runStep } from "./step-runner.js";
16
16
  // ---------------------------------------------------------------------------
17
17
  // Job progress reporter
@@ -142,10 +142,16 @@ export async function orchestratePipeline(ctx, steps) {
142
142
  if (hasJob) {
143
143
  await reportJobProgress(ctx, steps[0]?.name ?? "init", 0, steps.length, "running", undefined, jobUpdates);
144
144
  }
145
+ // W0053 — artifact export phase. Opens the first time a non-`run-eval`
146
+ // step starts, signalling the user that promptfoo's progress bar is done
147
+ // and the (previously silent) GCS export/upload window is now active.
148
+ // Closed in a finally after the step loop, regardless of pipeline outcome.
149
+ const exportPhase = createExportPhaseGate(ctx);
145
150
  for (let i = 0; i < steps.length; i++) {
146
151
  const step = steps[i];
147
152
  ctx.logger.debug(`Starting step ${i + 1}/${steps.length}: ${step.name}`);
148
153
  ctx.logger.section(step.name);
154
+ exportPhase.maybeOpen(step.name);
149
155
  // Report current step progress
150
156
  if (hasJob) {
151
157
  await reportJobProgress(ctx, step.name, i, steps.length, "running", undefined, jobUpdates);
@@ -171,6 +177,7 @@ export async function orchestratePipeline(ctx, steps) {
171
177
  // observability-only capture not tied to a registered artifact type;
172
178
  // dropped in W0050. Use the JobStore path for job telemetry.
173
179
  await capturePipelineContext(ctx, state, results);
180
+ exportPhase.close();
174
181
  return {
175
182
  belowCritical: state.belowCritical,
176
183
  durationMs: Date.now() - pipelineStart,
@@ -227,6 +234,7 @@ export async function orchestratePipeline(ctx, steps) {
227
234
  // Capture pipeline context. `job-updates` observability captures were
228
235
  // dropped in Slice 6.1 — JobStore is the supported telemetry path.
229
236
  await capturePipelineContext(ctx, state, results);
237
+ exportPhase.close();
230
238
  return {
231
239
  belowCritical: state.belowCritical,
232
240
  durationMs,
@@ -237,3 +245,91 @@ export async function orchestratePipeline(ctx, steps) {
237
245
  validation,
238
246
  };
239
247
  }
248
+ // ---------------------------------------------------------------------------
249
+ // Artifact export phase gate (W0053)
250
+ // ---------------------------------------------------------------------------
251
+ /**
252
+ * Returns a lazy gate that opens the `artifact-export` progress phase on the
253
+ * first step after `run-eval` and closes it on pipeline completion. The gate
254
+ * tolerates repeated opens / closes — each is a no-op after the first.
255
+ *
256
+ * The phase is keyed on step names rather than timestamps so the header lands
257
+ * exactly when the user sees promptfoo's `Evaluating` bar hit 100% and the
258
+ * next pipeline step takes over. `run-eval` produces artifacts too, but its
259
+ * own progress is owned by promptfoo; opening the phase before run-eval would
260
+ * produce a duplicate progress channel for the same window.
261
+ */
262
+ function createExportPhaseGate(ctx) {
263
+ let opened = false;
264
+ let closed = false;
265
+ let startedAt = 0;
266
+ // Step names that run BEFORE the artifact-heavy post-eval section. The
267
+ // phase opens on the first step whose name is not in this set — typically
268
+ // `calculate-scores` once promptfoo has handed back control.
269
+ const preExportSteps = new Set([
270
+ "validate",
271
+ "mirror-repo-tasks",
272
+ "fetch-docs",
273
+ "generate-configs",
274
+ "grader-consistency",
275
+ ]);
276
+ const { label, detail } = describeExportPhase(ctx);
277
+ return {
278
+ maybeOpen(stepName) {
279
+ if (opened)
280
+ return;
281
+ if (stepName.startsWith("run-eval"))
282
+ return;
283
+ if (preExportSteps.has(stepName))
284
+ return;
285
+ opened = true;
286
+ startedAt = Date.now();
287
+ ctx.progress.phaseStart({
288
+ phaseId: ARTIFACT_EXPORT_PHASE_ID,
289
+ label,
290
+ detail,
291
+ startedAt,
292
+ });
293
+ },
294
+ close() {
295
+ if (!opened || closed)
296
+ return;
297
+ closed = true;
298
+ // Cumulative counts live inside the reporter adapter (it accumulates
299
+ // each phaseProgress event). The orchestrator does not track the
300
+ // running total — it only knows when the phase is over. Adapters that
301
+ // render a final summary use their own state; NoOp / JSON adapters
302
+ // ignore the event.
303
+ ctx.progress.phaseComplete({
304
+ phaseId: ARTIFACT_EXPORT_PHASE_ID,
305
+ itemsCompleted: 0,
306
+ bytesCompleted: 0,
307
+ durationMs: Date.now() - startedAt,
308
+ });
309
+ },
310
+ };
311
+ }
312
+ /**
313
+ * Build the user-facing phase label by peeking at the wired writer chain.
314
+ * `AccumulatingArtifactWriter` wraps a `FanoutArtifactWriter([local, remote])`
315
+ * when remote credentials are present, or a bare `LocalFilesystemArtifactWriter`
316
+ * otherwise — naming the destination in the label keeps every progress line
317
+ * self-describing.
318
+ */
319
+ function describeExportPhase(ctx) {
320
+ const writer = ctx.artifactWriter;
321
+ const inner = writer.inner?.constructor.name ?? writer.constructor.name;
322
+ if (inner === "FanoutArtifactWriter") {
323
+ return { label: "Exporting run artifacts", detail: "local + GCS" };
324
+ }
325
+ if (inner === "GcsArtifactWriter") {
326
+ return { label: "Exporting run artifacts", detail: "GCS" };
327
+ }
328
+ if (inner === "ApiGatewayArtifactWriter") {
329
+ return { label: "Exporting run artifacts", detail: "API gateway" };
330
+ }
331
+ if (inner === "NoOpArtifactWriter") {
332
+ return { label: "Finalizing run" };
333
+ }
334
+ return { label: "Exporting run artifacts", detail: "local" };
335
+ }