@sanity/ailf 3.0.0 → 3.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_vendor/ailf-core/artifact-capture/association.d.ts +37 -0
- package/dist/_vendor/ailf-core/artifact-capture/association.js +19 -0
- package/dist/_vendor/ailf-core/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/index.js +1 -1
- package/dist/_vendor/ailf-core/ports/context.d.ts +8 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/index.js +1 -0
- package/dist/_vendor/ailf-core/ports/progress-reporter.d.ts +74 -0
- package/dist/_vendor/ailf-core/ports/progress-reporter.js +26 -0
- package/dist/_vendor/ailf-core/services/slim-report-summary.js +1 -16
- package/dist/adapters/progress/console-progress-reporter.d.ts +35 -0
- package/dist/adapters/progress/console-progress-reporter.js +110 -0
- package/dist/artifact-capture/api-gateway-artifact-writer.d.ts +8 -1
- package/dist/artifact-capture/api-gateway-artifact-writer.js +79 -42
- package/dist/artifact-capture/batching-api-gateway-artifact-writer.d.ts +108 -0
- package/dist/artifact-capture/batching-api-gateway-artifact-writer.js +492 -0
- package/dist/artifact-capture/fanout-artifact-writer.d.ts +14 -2
- package/dist/artifact-capture/fanout-artifact-writer.js +25 -4
- package/dist/artifact-capture/gcs-artifact-writer.d.ts +27 -1
- package/dist/artifact-capture/gcs-artifact-writer.js +168 -38
- package/dist/artifact-capture/instrumented-artifact-writer.d.ts +32 -0
- package/dist/artifact-capture/instrumented-artifact-writer.js +151 -0
- package/dist/artifact-capture/local-fs-artifact-writer.d.ts +8 -1
- package/dist/artifact-capture/local-fs-artifact-writer.js +23 -4
- package/dist/artifact-capture/parallel-emit.d.ts +43 -0
- package/dist/artifact-capture/parallel-emit.js +84 -0
- package/dist/artifact-capture/upload-metrics.d.ts +62 -0
- package/dist/artifact-capture/upload-metrics.js +125 -0
- package/dist/composition-root.d.ts +2 -2
- package/dist/composition-root.js +97 -11
- package/dist/orchestration/pipeline-orchestrator.js +97 -1
- package/dist/orchestration/steps/calculate-scores-step.js +9 -7
- package/dist/orchestration/steps/finalize-run-step.js +40 -8
- package/dist/pipeline/emit-eval-results.js +29 -11
- package/dist/pipeline/upload-test-outputs.d.ts +12 -5
- package/dist/pipeline/upload-test-outputs.js +27 -10
- package/package.json +1 -1
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* parallel-emit.ts — W0056 prototype A (client-side parallelism).
|
|
3
|
+
*
|
|
4
|
+
* Bounded-concurrency helper for fanning out artifact emits. The baseline
|
|
5
|
+
* measurement (see `docs/design-docs/artifact-upload-throughput.md`) shows
|
|
6
|
+
* producer loops call `await writer.emit(...)` serially, and per-artifact
|
|
7
|
+
* wall clock is dominated by GCS response latency. A simple `p-limit(N)`
|
|
8
|
+
* turns that into a batched-parallel flow against the existing writers.
|
|
9
|
+
*
|
|
10
|
+
* Gated on `AILF_PARALLEL_UPLOAD`:
|
|
11
|
+
* - unset → use the per-writer default set at composition time.
|
|
12
|
+
* - "0" → forced serial (override when default-on is undesirable).
|
|
13
|
+
* - "1" → parallel, default concurrency 8.
|
|
14
|
+
* - "<N>" (N > 1 integer) → parallel with concurrency N.
|
|
15
|
+
*
|
|
16
|
+
* The per-writer default is set by the composition root via
|
|
17
|
+
* `setDefaultUploadConcurrency`. Writers with measured safe parallelism
|
|
18
|
+
* (GCS direct) set 8; writers still on serial (API Gateway, until the
|
|
19
|
+
* batching rollout completes) leave it at the module default of 1.
|
|
20
|
+
*/
|
|
21
|
+
/**
|
|
22
|
+
* Set the default concurrency used when `AILF_PARALLEL_UPLOAD` is unset.
|
|
23
|
+
* Composition root calls this once per run based on the selected remote
|
|
24
|
+
* writer. Tests reset by passing 1.
|
|
25
|
+
*/
|
|
26
|
+
export declare function setDefaultUploadConcurrency(n: number): void;
|
|
27
|
+
/** Exposed for tests — returns the current module default. */
|
|
28
|
+
export declare function getDefaultUploadConcurrency(): number;
|
|
29
|
+
/**
|
|
30
|
+
* Resolve the configured concurrency. Returns 1 (serial) when parallelism is
|
|
31
|
+
* explicitly disabled or the env value is invalid; otherwise returns the
|
|
32
|
+
* per-writer module default when `AILF_PARALLEL_UPLOAD` is unset.
|
|
33
|
+
*/
|
|
34
|
+
export declare function resolveUploadConcurrency(): number;
|
|
35
|
+
/**
|
|
36
|
+
* Run `fn` against every item with at most `concurrency` active at once.
|
|
37
|
+
* Preserves input order in the result array. Rejections propagate — callers
|
|
38
|
+
* with non-blocking semantics should catch inside `fn`.
|
|
39
|
+
*
|
|
40
|
+
* When `concurrency <= 1`, runs strictly serially (drop-in equivalent of a
|
|
41
|
+
* `for … await` loop).
|
|
42
|
+
*/
|
|
43
|
+
export declare function parallelMap<T, R>(items: readonly T[], concurrency: number, fn: (item: T, index: number) => Promise<R>): Promise<R[]>;
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* parallel-emit.ts — W0056 prototype A (client-side parallelism).
|
|
3
|
+
*
|
|
4
|
+
* Bounded-concurrency helper for fanning out artifact emits. The baseline
|
|
5
|
+
* measurement (see `docs/design-docs/artifact-upload-throughput.md`) shows
|
|
6
|
+
* producer loops call `await writer.emit(...)` serially, and per-artifact
|
|
7
|
+
* wall clock is dominated by GCS response latency. A simple `p-limit(N)`
|
|
8
|
+
* turns that into a batched-parallel flow against the existing writers.
|
|
9
|
+
*
|
|
10
|
+
* Gated on `AILF_PARALLEL_UPLOAD`:
|
|
11
|
+
* - unset → use the per-writer default set at composition time.
|
|
12
|
+
* - "0" → forced serial (override when default-on is undesirable).
|
|
13
|
+
* - "1" → parallel, default concurrency 8.
|
|
14
|
+
* - "<N>" (N > 1 integer) → parallel with concurrency N.
|
|
15
|
+
*
|
|
16
|
+
* The per-writer default is set by the composition root via
|
|
17
|
+
* `setDefaultUploadConcurrency`. Writers with measured safe parallelism
|
|
18
|
+
* (GCS direct) set 8; writers still on serial (API Gateway, until the
|
|
19
|
+
* batching rollout completes) leave it at the module default of 1.
|
|
20
|
+
*/
|
|
21
|
+
const DEFAULT_CONCURRENCY = 8;
|
|
22
|
+
let moduleDefault = 1;
|
|
23
|
+
/**
|
|
24
|
+
* Set the default concurrency used when `AILF_PARALLEL_UPLOAD` is unset.
|
|
25
|
+
* Composition root calls this once per run based on the selected remote
|
|
26
|
+
* writer. Tests reset by passing 1.
|
|
27
|
+
*/
|
|
28
|
+
export function setDefaultUploadConcurrency(n) {
|
|
29
|
+
moduleDefault = n >= 1 ? n : 1;
|
|
30
|
+
}
|
|
31
|
+
/** Exposed for tests — returns the current module default. */
|
|
32
|
+
export function getDefaultUploadConcurrency() {
|
|
33
|
+
return moduleDefault;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Resolve the configured concurrency. Returns 1 (serial) when parallelism is
|
|
37
|
+
* explicitly disabled or the env value is invalid; otherwise returns the
|
|
38
|
+
* per-writer module default when `AILF_PARALLEL_UPLOAD` is unset.
|
|
39
|
+
*/
|
|
40
|
+
export function resolveUploadConcurrency() {
|
|
41
|
+
const raw = process.env.AILF_PARALLEL_UPLOAD ?? "";
|
|
42
|
+
if (raw === "0")
|
|
43
|
+
return 1;
|
|
44
|
+
if (raw === "")
|
|
45
|
+
return moduleDefault;
|
|
46
|
+
if (raw === "1")
|
|
47
|
+
return DEFAULT_CONCURRENCY;
|
|
48
|
+
const parsed = Number.parseInt(raw, 10);
|
|
49
|
+
if (Number.isFinite(parsed) && parsed > 1)
|
|
50
|
+
return parsed;
|
|
51
|
+
return moduleDefault;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Run `fn` against every item with at most `concurrency` active at once.
|
|
55
|
+
* Preserves input order in the result array. Rejections propagate — callers
|
|
56
|
+
* with non-blocking semantics should catch inside `fn`.
|
|
57
|
+
*
|
|
58
|
+
* When `concurrency <= 1`, runs strictly serially (drop-in equivalent of a
|
|
59
|
+
* `for … await` loop).
|
|
60
|
+
*/
|
|
61
|
+
export async function parallelMap(items, concurrency, fn) {
|
|
62
|
+
if (items.length === 0)
|
|
63
|
+
return [];
|
|
64
|
+
if (concurrency <= 1) {
|
|
65
|
+
const out = [];
|
|
66
|
+
for (let i = 0; i < items.length; i++) {
|
|
67
|
+
out.push(await fn(items[i], i));
|
|
68
|
+
}
|
|
69
|
+
return out;
|
|
70
|
+
}
|
|
71
|
+
const results = new Array(items.length);
|
|
72
|
+
let cursor = 0;
|
|
73
|
+
async function worker() {
|
|
74
|
+
while (true) {
|
|
75
|
+
const i = cursor++;
|
|
76
|
+
if (i >= items.length)
|
|
77
|
+
return;
|
|
78
|
+
results[i] = await fn(items[i], i);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
const width = Math.min(concurrency, items.length);
|
|
82
|
+
await Promise.all(Array.from({ length: width }, () => worker()));
|
|
83
|
+
return results;
|
|
84
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* UploadMetrics — spike instrumentation for W0056 (faster artifact upload).
|
|
3
|
+
*
|
|
4
|
+
* Captures per-operation timing on the artifact-upload path so the spike has
|
|
5
|
+
* a measured baseline: artifact count, total bytes, wall-clock, and the
|
|
6
|
+
* sign-RTT vs. PUT split. Gated on `AILF_UPLOAD_METRICS=1` in the composition
|
|
7
|
+
* root — a no-op when off.
|
|
8
|
+
*
|
|
9
|
+
* Design:
|
|
10
|
+
* - `UploadMetricsSink` is the narrow interface that writers depend on.
|
|
11
|
+
* - `UploadMetrics` is the in-process implementation that buffers events
|
|
12
|
+
* and emits both a stderr summary table and an NDJSON detail file.
|
|
13
|
+
* - `summarize()` is called by `InstrumentedArtifactWriter` once, after
|
|
14
|
+
* `writeManifest` succeeds (the natural end-of-run signal).
|
|
15
|
+
*
|
|
16
|
+
* This file is a spike deliverable — the API is intentionally ad hoc and
|
|
17
|
+
* may be promoted to `packages/core/src/ports/` if we ship anything.
|
|
18
|
+
*/
|
|
19
|
+
import type { Logger } from "../_vendor/ailf-core/index.d.ts";
|
|
20
|
+
export type UploadPhase = "sign" | "put" | "compose" | "emit" | "ndjson-part" | "manifest";
|
|
21
|
+
export interface UploadMetricEvent {
|
|
22
|
+
/** ISO timestamp the event was recorded. */
|
|
23
|
+
ts: string;
|
|
24
|
+
/** Phase being measured — writers record `sign`/`put`/`compose` at the call site; the decorator records `emit`/`ndjson-part`/`manifest` end-to-end. */
|
|
25
|
+
phase: UploadPhase;
|
|
26
|
+
/** Writer class that produced the event (e.g. "ApiGatewayArtifactWriter"). */
|
|
27
|
+
writer: string;
|
|
28
|
+
/** Artifact type (or `"manifest"`). */
|
|
29
|
+
type: string;
|
|
30
|
+
/** Wall-clock for the phase, in milliseconds. */
|
|
31
|
+
ms: number;
|
|
32
|
+
/** Body size in bytes, when applicable. */
|
|
33
|
+
bytes?: number;
|
|
34
|
+
/** True when the underlying call resolved without throwing / without a non-2xx response. */
|
|
35
|
+
success: boolean;
|
|
36
|
+
}
|
|
37
|
+
export interface UploadMetricsSink {
|
|
38
|
+
record(event: Omit<UploadMetricEvent, "ts">): void;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* No-op sink — writers default to this when metrics are off, so the
|
|
42
|
+
* instrumentation call sites remain uniform whether or not the collector is
|
|
43
|
+
* active.
|
|
44
|
+
*/
|
|
45
|
+
export declare const NO_OP_UPLOAD_METRICS: UploadMetricsSink;
|
|
46
|
+
export interface UploadMetricsOptions {
|
|
47
|
+
/** Logger used for the summary table. */
|
|
48
|
+
logger: Logger;
|
|
49
|
+
/** Absolute path where the NDJSON detail file is written. Skipped when undefined. */
|
|
50
|
+
detailFile?: string;
|
|
51
|
+
}
|
|
52
|
+
export declare class UploadMetrics implements UploadMetricsSink {
|
|
53
|
+
private readonly options;
|
|
54
|
+
private readonly events;
|
|
55
|
+
private summarized;
|
|
56
|
+
constructor(options: UploadMetricsOptions);
|
|
57
|
+
record(event: Omit<UploadMetricEvent, "ts">): void;
|
|
58
|
+
summarize(): Promise<void>;
|
|
59
|
+
/** Exposed for tests — returns a copy. */
|
|
60
|
+
snapshot(): readonly UploadMetricEvent[];
|
|
61
|
+
}
|
|
62
|
+
export declare function buildSummaryTable(events: readonly UploadMetricEvent[]): string;
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* UploadMetrics — spike instrumentation for W0056 (faster artifact upload).
|
|
3
|
+
*
|
|
4
|
+
* Captures per-operation timing on the artifact-upload path so the spike has
|
|
5
|
+
* a measured baseline: artifact count, total bytes, wall-clock, and the
|
|
6
|
+
* sign-RTT vs. PUT split. Gated on `AILF_UPLOAD_METRICS=1` in the composition
|
|
7
|
+
* root — a no-op when off.
|
|
8
|
+
*
|
|
9
|
+
* Design:
|
|
10
|
+
* - `UploadMetricsSink` is the narrow interface that writers depend on.
|
|
11
|
+
* - `UploadMetrics` is the in-process implementation that buffers events
|
|
12
|
+
* and emits both a stderr summary table and an NDJSON detail file.
|
|
13
|
+
* - `summarize()` is called by `InstrumentedArtifactWriter` once, after
|
|
14
|
+
* `writeManifest` succeeds (the natural end-of-run signal).
|
|
15
|
+
*
|
|
16
|
+
* This file is a spike deliverable — the API is intentionally ad hoc and
|
|
17
|
+
* may be promoted to `packages/core/src/ports/` if we ship anything.
|
|
18
|
+
*/
|
|
19
|
+
import { mkdir, writeFile } from "node:fs/promises";
|
|
20
|
+
import { dirname } from "node:path";
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
// Implementation
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
/**
|
|
25
|
+
* No-op sink — writers default to this when metrics are off, so the
|
|
26
|
+
* instrumentation call sites remain uniform whether or not the collector is
|
|
27
|
+
* active.
|
|
28
|
+
*/
|
|
29
|
+
export const NO_OP_UPLOAD_METRICS = {
|
|
30
|
+
record() { },
|
|
31
|
+
};
|
|
32
|
+
export class UploadMetrics {
|
|
33
|
+
options;
|
|
34
|
+
events = [];
|
|
35
|
+
summarized = false;
|
|
36
|
+
constructor(options) {
|
|
37
|
+
this.options = options;
|
|
38
|
+
}
|
|
39
|
+
record(event) {
|
|
40
|
+
this.events.push({ ...event, ts: new Date().toISOString() });
|
|
41
|
+
}
|
|
42
|
+
async summarize() {
|
|
43
|
+
if (this.summarized)
|
|
44
|
+
return;
|
|
45
|
+
this.summarized = true;
|
|
46
|
+
const { logger, detailFile } = this.options;
|
|
47
|
+
if (this.events.length === 0) {
|
|
48
|
+
logger.info("[upload-metrics] no events recorded");
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
const table = buildSummaryTable(this.events);
|
|
52
|
+
logger.info(`[upload-metrics] ${this.events.length} events recorded\n${table}`);
|
|
53
|
+
if (detailFile) {
|
|
54
|
+
try {
|
|
55
|
+
await mkdir(dirname(detailFile), { recursive: true });
|
|
56
|
+
const body = this.events.map((e) => JSON.stringify(e)).join("\n") + "\n";
|
|
57
|
+
await writeFile(detailFile, body, "utf-8");
|
|
58
|
+
logger.info(`[upload-metrics] detail written to ${detailFile}`);
|
|
59
|
+
}
|
|
60
|
+
catch (err) {
|
|
61
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
62
|
+
logger.warn(`[upload-metrics] failed to write detail file "${detailFile}": ${message}`);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
/** Exposed for tests — returns a copy. */
|
|
67
|
+
snapshot() {
|
|
68
|
+
return [...this.events];
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
export function buildSummaryTable(events) {
|
|
72
|
+
const byKey = new Map();
|
|
73
|
+
for (const ev of events) {
|
|
74
|
+
const key = `${ev.phase}\t${ev.writer}`;
|
|
75
|
+
let bucket = byKey.get(key);
|
|
76
|
+
if (!bucket) {
|
|
77
|
+
bucket = [];
|
|
78
|
+
byKey.set(key, bucket);
|
|
79
|
+
}
|
|
80
|
+
bucket.push(ev);
|
|
81
|
+
}
|
|
82
|
+
const rows = [];
|
|
83
|
+
for (const [key, bucket] of byKey) {
|
|
84
|
+
const phase = key.split("\t").join(" · ");
|
|
85
|
+
const durations = bucket.map((e) => e.ms).sort((a, b) => a - b);
|
|
86
|
+
const totalMs = durations.reduce((sum, ms) => sum + ms, 0);
|
|
87
|
+
const totalBytes = bucket.reduce((sum, e) => sum + (e.bytes ?? 0), 0);
|
|
88
|
+
const failures = bucket.filter((e) => !e.success).length;
|
|
89
|
+
rows.push({
|
|
90
|
+
phase,
|
|
91
|
+
count: bucket.length,
|
|
92
|
+
failures,
|
|
93
|
+
totalMs,
|
|
94
|
+
totalBytes,
|
|
95
|
+
p50: percentile(durations, 0.5),
|
|
96
|
+
p95: percentile(durations, 0.95),
|
|
97
|
+
max: durations[durations.length - 1] ?? 0,
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
rows.sort((a, b) => b.totalMs - a.totalMs);
|
|
101
|
+
const header = "phase | n | fail | bytes | total ms | p50 | p95 | max";
|
|
102
|
+
const sep = "-----------------------------------------------+-----+------+-------------+----------+-----+-----+-----";
|
|
103
|
+
const body = rows
|
|
104
|
+
.map((r) => `${pad(r.phase, 47)}| ${pad(String(r.count), 4)}| ${pad(String(r.failures), 5)}| ${pad(formatBytes(r.totalBytes), 12)}| ${pad(String(Math.round(r.totalMs)), 9)}| ${pad(String(Math.round(r.p50)), 4)}| ${pad(String(Math.round(r.p95)), 4)}| ${Math.round(r.max)}`)
|
|
105
|
+
.join("\n");
|
|
106
|
+
return `${header}\n${sep}\n${body}`;
|
|
107
|
+
}
|
|
108
|
+
function percentile(sorted, p) {
|
|
109
|
+
if (sorted.length === 0)
|
|
110
|
+
return 0;
|
|
111
|
+
const idx = Math.min(sorted.length - 1, Math.max(0, Math.ceil(sorted.length * p) - 1));
|
|
112
|
+
return sorted[idx] ?? 0;
|
|
113
|
+
}
|
|
114
|
+
function pad(s, width) {
|
|
115
|
+
return s.length >= width ? `${s} ` : s + " ".repeat(width - s.length);
|
|
116
|
+
}
|
|
117
|
+
function formatBytes(n) {
|
|
118
|
+
if (n < 1024)
|
|
119
|
+
return `${n} B`;
|
|
120
|
+
if (n < 1024 * 1024)
|
|
121
|
+
return `${(n / 1024).toFixed(1)} KB`;
|
|
122
|
+
if (n < 1024 * 1024 * 1024)
|
|
123
|
+
return `${(n / 1024 / 1024).toFixed(1)} MB`;
|
|
124
|
+
return `${(n / 1024 / 1024 / 1024).toFixed(2)} GB`;
|
|
125
|
+
}
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
* @see packages/core/src/ports/context.ts — AppContext interface
|
|
16
16
|
* @see docs/archive/exec-plans/ports-and-adapters/phase-7-composition-root.md
|
|
17
17
|
*/
|
|
18
|
-
import { type AppContext, type ArtifactWriter, type AssertionRegistration, type Logger, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
|
|
18
|
+
import { type AppContext, type ArtifactWriter, type ArtifactWriterProgressOptions, type AssertionRegistration, type Logger, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
|
|
19
19
|
/**
|
|
20
20
|
* Create a fully wired AppContext from resolved configuration.
|
|
21
21
|
*
|
|
@@ -41,7 +41,7 @@ export declare function createAppContext(config: ResolvedConfig): AppContext;
|
|
|
41
41
|
*
|
|
42
42
|
* Exported for unit-test access; not part of the public package API.
|
|
43
43
|
*/
|
|
44
|
-
export declare function createArtifactWriter(config: ResolvedConfig, logger: Logger): ArtifactWriter;
|
|
44
|
+
export declare function createArtifactWriter(config: ResolvedConfig, logger: Logger, progress?: ArtifactWriterProgressOptions): ArtifactWriter;
|
|
45
45
|
/**
|
|
46
46
|
* Generic Promptfoo assertion types available to all evaluation modes.
|
|
47
47
|
*
|
package/dist/composition-root.js
CHANGED
|
@@ -15,17 +15,22 @@
|
|
|
15
15
|
* @see packages/core/src/ports/context.ts — AppContext interface
|
|
16
16
|
* @see docs/archive/exec-plans/ports-and-adapters/phase-7-composition-root.md
|
|
17
17
|
*/
|
|
18
|
-
import { InMemoryPluginRegistry, NoOpArtifactWriter, generateRunId, isArtifactType, } from "./_vendor/ailf-core/index.js";
|
|
18
|
+
import { ARTIFACT_EXPORT_PHASE_ID, InMemoryPluginRegistry, NoOpArtifactWriter, NoOpProgressReporter, generateRunId, isArtifactType, } from "./_vendor/ailf-core/index.js";
|
|
19
19
|
import { AccumulatingArtifactWriter } from "./artifact-capture/accumulating-artifact-writer.js";
|
|
20
20
|
import { ApiGatewayArtifactWriter } from "./artifact-capture/api-gateway-artifact-writer.js";
|
|
21
|
+
import { BatchingApiGatewayArtifactWriter } from "./artifact-capture/batching-api-gateway-artifact-writer.js";
|
|
21
22
|
import { FanoutArtifactWriter } from "./artifact-capture/fanout-artifact-writer.js";
|
|
22
23
|
import { GcsArtifactWriter } from "./artifact-capture/gcs-artifact-writer.js";
|
|
24
|
+
import { InstrumentedArtifactWriter } from "./artifact-capture/instrumented-artifact-writer.js";
|
|
23
25
|
import { LocalFilesystemArtifactWriter } from "./artifact-capture/local-fs-artifact-writer.js";
|
|
26
|
+
import { resolveUploadConcurrency, setDefaultUploadConcurrency, } from "./artifact-capture/parallel-emit.js";
|
|
27
|
+
import { UploadMetrics } from "./artifact-capture/upload-metrics.js";
|
|
24
28
|
import { ContentLakeCacheAdapter } from "./adapters/cache/content-lake-cache.js";
|
|
25
29
|
import { loadExternalPresets } from "./pipeline/compiler/preset-loader.js";
|
|
26
30
|
import { FilesystemCache } from "./adapters/cache/filesystem-cache.js";
|
|
27
31
|
import { PromptfooEvalAdapter } from "./adapters/eval-runners/promptfoo-eval-adapter.js";
|
|
28
32
|
import { ConsoleLogger, JsonLogger, QuietLogger, } from "./adapters/loggers/index.js";
|
|
33
|
+
import { ConsoleProgressReporter } from "./adapters/progress/console-progress-reporter.js";
|
|
29
34
|
import { CompositeTaskSource, ContentLakeTaskSource, RepoTaskSource, } from "./adapters/task-sources/index.js";
|
|
30
35
|
import { createAgentHarnessBase, createKnowledgeProbeBase, createLiteracyModeBase, createMcpServerModeBase, } from "./pipeline/compiler/mode-bases/index.js";
|
|
31
36
|
import { createSanityLiteracyPreset } from "./pipeline/compiler/presets/index.js";
|
|
@@ -41,6 +46,9 @@ import { loadSinks } from "./sinks/index.js";
|
|
|
41
46
|
export function createAppContext(config) {
|
|
42
47
|
// Logger — selected by env var preferences
|
|
43
48
|
const logger = createLogger();
|
|
49
|
+
// Progress reporter — console-backed for the default logger; no-op for
|
|
50
|
+
// JSON/quiet modes and tests where interactive output is inappropriate.
|
|
51
|
+
const progress = createProgressReporter();
|
|
44
52
|
// Cache — filesystem, optionally decorated with Content Lake fallback
|
|
45
53
|
const cache = config.noCache ? undefined : createCache(config);
|
|
46
54
|
// Task source — selected by config.taskSourceType
|
|
@@ -64,7 +72,12 @@ export function createAppContext(config) {
|
|
|
64
72
|
// `runs/{runId}/…` paths (D0032). Auto-detects the right adapter from
|
|
65
73
|
// available credentials; defaults bucket to "ailf-artifacts". Set
|
|
66
74
|
// artifactUpload: false to opt out entirely.
|
|
67
|
-
|
|
75
|
+
// W0053 — writers receive a progress reporter scoped to a single
|
|
76
|
+
// `artifact-export` phase so the CLI can render per-batch updates.
|
|
77
|
+
const artifactWriter = createArtifactWriter(config, logger, {
|
|
78
|
+
reporter: progress,
|
|
79
|
+
phaseId: ARTIFACT_EXPORT_PHASE_ID,
|
|
80
|
+
});
|
|
68
81
|
// Generate the pipeline's RunId once; every downstream step reads it
|
|
69
82
|
// from the context (D0032).
|
|
70
83
|
const runId = generateRunId();
|
|
@@ -76,6 +89,7 @@ export function createAppContext(config) {
|
|
|
76
89
|
docFetcher,
|
|
77
90
|
evalRunner,
|
|
78
91
|
logger,
|
|
92
|
+
progress,
|
|
79
93
|
registry,
|
|
80
94
|
reportStore,
|
|
81
95
|
runId,
|
|
@@ -98,6 +112,23 @@ function createLogger() {
|
|
|
98
112
|
process.env.AILF_VERBOSE === "1",
|
|
99
113
|
});
|
|
100
114
|
}
|
|
115
|
+
/**
|
|
116
|
+
* Select a ProgressReporter adapter. Matches the logger environment — JSON
|
|
117
|
+
* and quiet loggers get a no-op reporter so machine-readable output stays
|
|
118
|
+
* clean; interactive sessions get the console adapter with verbose mirroring.
|
|
119
|
+
*/
|
|
120
|
+
function createProgressReporter() {
|
|
121
|
+
if (process.env.AILF_LOG_FORMAT === "json")
|
|
122
|
+
return new NoOpProgressReporter();
|
|
123
|
+
if (process.env.AILF_LOG_LEVEL === "quiet" ||
|
|
124
|
+
process.env.AILF_QUIET === "1") {
|
|
125
|
+
return new NoOpProgressReporter();
|
|
126
|
+
}
|
|
127
|
+
return new ConsoleProgressReporter({
|
|
128
|
+
verbose: process.env.AILF_LOG_LEVEL === "verbose" ||
|
|
129
|
+
process.env.AILF_VERBOSE === "1",
|
|
130
|
+
});
|
|
131
|
+
}
|
|
101
132
|
/**
|
|
102
133
|
* Shared GCS bucket for report artifacts. Matches the gateway default at
|
|
103
134
|
* packages/api/src/routes/artifacts.ts — both sides assume ailf-artifacts
|
|
@@ -129,7 +160,7 @@ const DEFAULT_LOCAL_ARTIFACTS_DIR = ".ailf/results/captures";
|
|
|
129
160
|
*
|
|
130
161
|
* Exported for unit-test access; not part of the public package API.
|
|
131
162
|
*/
|
|
132
|
-
export function createArtifactWriter(config, logger) {
|
|
163
|
+
export function createArtifactWriter(config, logger, progress) {
|
|
133
164
|
// Legacy `artifactUpload: false` still disables — treat as an alias for
|
|
134
165
|
// the canonical `artifactsDisabled: true` until W0052 removes it.
|
|
135
166
|
if (config.artifactsDisabled === true || config.artifactUpload === false) {
|
|
@@ -138,10 +169,27 @@ export function createArtifactWriter(config, logger) {
|
|
|
138
169
|
}
|
|
139
170
|
const exclude = resolveExcludeList(config.artifactsExclude, logger);
|
|
140
171
|
const rootDir = config.artifactsDir ?? DEFAULT_LOCAL_ARTIFACTS_DIR;
|
|
141
|
-
|
|
142
|
-
|
|
172
|
+
// W0056 — opt-in measurement of the upload path. The collector is passed
|
|
173
|
+
// to the remote writer (where sign/PUT/compose phases live) AND wraps the
|
|
174
|
+
// final writer to record caller-observed `emit`/`writeManifest` totals.
|
|
175
|
+
// `summarize()` fires from the decorator's `writeManifest` hook.
|
|
176
|
+
const metrics = process.env.AILF_UPLOAD_METRICS === "1"
|
|
177
|
+
? new UploadMetrics({
|
|
178
|
+
logger,
|
|
179
|
+
detailFile: `${rootDir}/upload-metrics/run-${Date.now()}.ndjson`,
|
|
180
|
+
})
|
|
181
|
+
: null;
|
|
182
|
+
// W0053: progress attaches to the OUTERMOST of (local-only | fanout). When
|
|
183
|
+
// fanout is wired, the delegates stay silent so we don't double-count the
|
|
184
|
+
// same caller-visible write across two backends.
|
|
185
|
+
const remote = createRemoteArtifactWriter(config, logger, metrics);
|
|
186
|
+
const local = new LocalFilesystemArtifactWriter({
|
|
187
|
+
rootDir,
|
|
188
|
+
exclude,
|
|
189
|
+
...(remote ? {} : { progress }),
|
|
190
|
+
});
|
|
143
191
|
const base = remote
|
|
144
|
-
? new FanoutArtifactWriter([local, remote])
|
|
192
|
+
? new FanoutArtifactWriter([local, remote], { progress })
|
|
145
193
|
: local;
|
|
146
194
|
if (!remote) {
|
|
147
195
|
logger.debug(`Artifact writer: LocalFilesystemArtifactWriter only (rootDir=${rootDir})`);
|
|
@@ -153,7 +201,10 @@ export function createArtifactWriter(config, logger) {
|
|
|
153
201
|
// RunManifest without each producer bookkeeping its own ArtifactRefs
|
|
154
202
|
// (W0051 Slice 3 revisit — Option B of the "manifest empty on real runs"
|
|
155
203
|
// fix).
|
|
156
|
-
|
|
204
|
+
const accumulating = new AccumulatingArtifactWriter(base);
|
|
205
|
+
return metrics
|
|
206
|
+
? new InstrumentedArtifactWriter(accumulating, metrics)
|
|
207
|
+
: accumulating;
|
|
157
208
|
}
|
|
158
209
|
/**
|
|
159
210
|
* Validate the exclude list against the registry. Unknown types are dropped
|
|
@@ -179,19 +230,54 @@ function resolveExcludeList(raw, logger) {
|
|
|
179
230
|
* the sole backend for that run, which is the D0033 M4 default for laptops
|
|
180
231
|
* and CI without GCS creds.
|
|
181
232
|
*/
|
|
182
|
-
function createRemoteArtifactWriter(config, logger) {
|
|
233
|
+
function createRemoteArtifactWriter(config, logger, metrics) {
|
|
183
234
|
const bucket = config.artifactGcsBucket ?? DEFAULT_ARTIFACT_BUCKET;
|
|
184
235
|
const hasGcsCredentials = Boolean(process.env.GOOGLE_APPLICATION_CREDENTIALS || process.env.GCLOUD_PROJECT);
|
|
185
236
|
if (hasGcsCredentials) {
|
|
186
|
-
|
|
187
|
-
|
|
237
|
+
// W0056 Phase 1: the GCS-direct path measured 0 failures at
|
|
238
|
+
// concurrency 8 with a 60 % pipeline-time reduction. Flip parallelism
|
|
239
|
+
// on by default on this path. `AILF_PARALLEL_UPLOAD=0` still forces
|
|
240
|
+
// serial as an escape hatch.
|
|
241
|
+
setDefaultUploadConcurrency(8);
|
|
242
|
+
logger.debug(`Artifact remote backend: GcsArtifactWriter (ADC, bucket=${bucket}, defaultConcurrency=8)`);
|
|
243
|
+
return new GcsArtifactWriter({
|
|
244
|
+
bucket,
|
|
245
|
+
...(metrics ? { metrics } : {}),
|
|
246
|
+
});
|
|
188
247
|
}
|
|
189
248
|
if (config.apiKey && config.apiUrl) {
|
|
190
|
-
|
|
249
|
+
// W0058 Phase 2: batching writer is the default on the API Gateway path.
|
|
250
|
+
// Prototype B (W0056) showed batch signing + client-side parallelism
|
|
251
|
+
// eliminates the 429 storm that single-URL parallelism triggered on the
|
|
252
|
+
// Vercel signing endpoint, at parity with the GCS-direct parallel path
|
|
253
|
+
// once the sign+PUT overlap optimization lands. Flip the default to 8
|
|
254
|
+
// concurrency; `AILF_PARALLEL_UPLOAD=0` forces serial as a rollback
|
|
255
|
+
// escape hatch and auto-selects the legacy single-URL writer.
|
|
256
|
+
setDefaultUploadConcurrency(8);
|
|
257
|
+
const concurrency = resolveUploadConcurrency();
|
|
258
|
+
if (concurrency > 1) {
|
|
259
|
+
logger.debug(`Artifact remote backend: BatchingApiGatewayArtifactWriter (via ${config.apiUrl}, bucket=${bucket}, putConcurrency=${concurrency})`);
|
|
260
|
+
// D0034: neither API Gateway writer supports NDJSON `appendNdjson`.
|
|
261
|
+
// Traces that flow through `appendNdjson` are dropped on this path.
|
|
262
|
+
// Surface the gap once at startup instead of ambushing users with a
|
|
263
|
+
// silent null ref at emit time.
|
|
264
|
+
logger.warn("Artifacts: API Gateway path selected without GCS ADC — " +
|
|
265
|
+
"trace (NDJSON) artifacts will be skipped (D0034). Set " +
|
|
266
|
+
"GOOGLE_APPLICATION_CREDENTIALS or GCLOUD_PROJECT to capture traces.");
|
|
267
|
+
return new BatchingApiGatewayArtifactWriter({
|
|
268
|
+
apiBaseUrl: config.apiUrl,
|
|
269
|
+
apiKey: config.apiKey,
|
|
270
|
+
bucket,
|
|
271
|
+
putConcurrency: concurrency,
|
|
272
|
+
...(metrics ? { metrics } : {}),
|
|
273
|
+
});
|
|
274
|
+
}
|
|
275
|
+
logger.debug(`Artifact remote backend: ApiGatewayArtifactWriter (via ${config.apiUrl}, bucket=${bucket}, serial — AILF_PARALLEL_UPLOAD=0 override)`);
|
|
191
276
|
return new ApiGatewayArtifactWriter({
|
|
192
277
|
apiBaseUrl: config.apiUrl,
|
|
193
278
|
apiKey: config.apiKey,
|
|
194
279
|
bucket,
|
|
280
|
+
...(metrics ? { metrics } : {}),
|
|
195
281
|
});
|
|
196
282
|
}
|
|
197
283
|
return null;
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
* each step completes. This enables the GET /v1/jobs/:jobId polling
|
|
12
12
|
* endpoint to show real-time progress.
|
|
13
13
|
*/
|
|
14
|
-
import { assoc, } from "../_vendor/ailf-core/index.js";
|
|
14
|
+
import { ARTIFACT_EXPORT_PHASE_ID, assoc, } from "../_vendor/ailf-core/index.js";
|
|
15
15
|
import { runStep } from "./step-runner.js";
|
|
16
16
|
// ---------------------------------------------------------------------------
|
|
17
17
|
// Job progress reporter
|
|
@@ -142,10 +142,16 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
142
142
|
if (hasJob) {
|
|
143
143
|
await reportJobProgress(ctx, steps[0]?.name ?? "init", 0, steps.length, "running", undefined, jobUpdates);
|
|
144
144
|
}
|
|
145
|
+
// W0053 — artifact export phase. Opens the first time a non-`run-eval`
|
|
146
|
+
// step starts, signalling the user that promptfoo's progress bar is done
|
|
147
|
+
// and the (previously silent) GCS export/upload window is now active.
|
|
148
|
+
// Closed in a finally after the step loop, regardless of pipeline outcome.
|
|
149
|
+
const exportPhase = createExportPhaseGate(ctx);
|
|
145
150
|
for (let i = 0; i < steps.length; i++) {
|
|
146
151
|
const step = steps[i];
|
|
147
152
|
ctx.logger.debug(`Starting step ${i + 1}/${steps.length}: ${step.name}`);
|
|
148
153
|
ctx.logger.section(step.name);
|
|
154
|
+
exportPhase.maybeOpen(step.name);
|
|
149
155
|
// Report current step progress
|
|
150
156
|
if (hasJob) {
|
|
151
157
|
await reportJobProgress(ctx, step.name, i, steps.length, "running", undefined, jobUpdates);
|
|
@@ -171,6 +177,7 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
171
177
|
// observability-only capture not tied to a registered artifact type;
|
|
172
178
|
// dropped in W0050. Use the JobStore path for job telemetry.
|
|
173
179
|
await capturePipelineContext(ctx, state, results);
|
|
180
|
+
exportPhase.close();
|
|
174
181
|
return {
|
|
175
182
|
belowCritical: state.belowCritical,
|
|
176
183
|
durationMs: Date.now() - pipelineStart,
|
|
@@ -227,6 +234,7 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
227
234
|
// Capture pipeline context. `job-updates` observability captures were
|
|
228
235
|
// dropped in Slice 6.1 — JobStore is the supported telemetry path.
|
|
229
236
|
await capturePipelineContext(ctx, state, results);
|
|
237
|
+
exportPhase.close();
|
|
230
238
|
return {
|
|
231
239
|
belowCritical: state.belowCritical,
|
|
232
240
|
durationMs,
|
|
@@ -237,3 +245,91 @@ export async function orchestratePipeline(ctx, steps) {
|
|
|
237
245
|
validation,
|
|
238
246
|
};
|
|
239
247
|
}
|
|
248
|
+
// ---------------------------------------------------------------------------
|
|
249
|
+
// Artifact export phase gate (W0053)
|
|
250
|
+
// ---------------------------------------------------------------------------
|
|
251
|
+
/**
|
|
252
|
+
* Returns a lazy gate that opens the `artifact-export` progress phase on the
|
|
253
|
+
* first step after `run-eval` and closes it on pipeline completion. The gate
|
|
254
|
+
* tolerates repeated opens / closes — each is a no-op after the first.
|
|
255
|
+
*
|
|
256
|
+
* The phase is keyed on step names rather than timestamps so the header lands
|
|
257
|
+
* exactly when the user sees promptfoo's `Evaluating` bar hit 100% and the
|
|
258
|
+
* next pipeline step takes over. `run-eval` produces artifacts too, but its
|
|
259
|
+
* own progress is owned by promptfoo; opening the phase before run-eval would
|
|
260
|
+
* produce a duplicate progress channel for the same window.
|
|
261
|
+
*/
|
|
262
|
+
function createExportPhaseGate(ctx) {
|
|
263
|
+
let opened = false;
|
|
264
|
+
let closed = false;
|
|
265
|
+
let startedAt = 0;
|
|
266
|
+
// Step names that run BEFORE the artifact-heavy post-eval section. The
|
|
267
|
+
// phase opens on the first step whose name is not in this set — typically
|
|
268
|
+
// `calculate-scores` once promptfoo has handed back control.
|
|
269
|
+
const preExportSteps = new Set([
|
|
270
|
+
"validate",
|
|
271
|
+
"mirror-repo-tasks",
|
|
272
|
+
"fetch-docs",
|
|
273
|
+
"generate-configs",
|
|
274
|
+
"grader-consistency",
|
|
275
|
+
]);
|
|
276
|
+
const { label, detail } = describeExportPhase(ctx);
|
|
277
|
+
return {
|
|
278
|
+
maybeOpen(stepName) {
|
|
279
|
+
if (opened)
|
|
280
|
+
return;
|
|
281
|
+
if (stepName.startsWith("run-eval"))
|
|
282
|
+
return;
|
|
283
|
+
if (preExportSteps.has(stepName))
|
|
284
|
+
return;
|
|
285
|
+
opened = true;
|
|
286
|
+
startedAt = Date.now();
|
|
287
|
+
ctx.progress.phaseStart({
|
|
288
|
+
phaseId: ARTIFACT_EXPORT_PHASE_ID,
|
|
289
|
+
label,
|
|
290
|
+
detail,
|
|
291
|
+
startedAt,
|
|
292
|
+
});
|
|
293
|
+
},
|
|
294
|
+
close() {
|
|
295
|
+
if (!opened || closed)
|
|
296
|
+
return;
|
|
297
|
+
closed = true;
|
|
298
|
+
// Cumulative counts live inside the reporter adapter (it accumulates
|
|
299
|
+
// each phaseProgress event). The orchestrator does not track the
|
|
300
|
+
// running total — it only knows when the phase is over. Adapters that
|
|
301
|
+
// render a final summary use their own state; NoOp / JSON adapters
|
|
302
|
+
// ignore the event.
|
|
303
|
+
ctx.progress.phaseComplete({
|
|
304
|
+
phaseId: ARTIFACT_EXPORT_PHASE_ID,
|
|
305
|
+
itemsCompleted: 0,
|
|
306
|
+
bytesCompleted: 0,
|
|
307
|
+
durationMs: Date.now() - startedAt,
|
|
308
|
+
});
|
|
309
|
+
},
|
|
310
|
+
};
|
|
311
|
+
}
|
|
312
|
+
/**
|
|
313
|
+
* Build the user-facing phase label by peeking at the wired writer chain.
|
|
314
|
+
* `AccumulatingArtifactWriter` wraps a `FanoutArtifactWriter([local, remote])`
|
|
315
|
+
* when remote credentials are present, or a bare `LocalFilesystemArtifactWriter`
|
|
316
|
+
* otherwise — naming the destination in the label keeps every progress line
|
|
317
|
+
* self-describing.
|
|
318
|
+
*/
|
|
319
|
+
function describeExportPhase(ctx) {
|
|
320
|
+
const writer = ctx.artifactWriter;
|
|
321
|
+
const inner = writer.inner?.constructor.name ?? writer.constructor.name;
|
|
322
|
+
if (inner === "FanoutArtifactWriter") {
|
|
323
|
+
return { label: "Exporting run artifacts", detail: "local + GCS" };
|
|
324
|
+
}
|
|
325
|
+
if (inner === "GcsArtifactWriter") {
|
|
326
|
+
return { label: "Exporting run artifacts", detail: "GCS" };
|
|
327
|
+
}
|
|
328
|
+
if (inner === "ApiGatewayArtifactWriter") {
|
|
329
|
+
return { label: "Exporting run artifacts", detail: "API gateway" };
|
|
330
|
+
}
|
|
331
|
+
if (inner === "NoOpArtifactWriter") {
|
|
332
|
+
return { label: "Finalizing run" };
|
|
333
|
+
}
|
|
334
|
+
return { label: "Exporting run artifacts", detail: "local" };
|
|
335
|
+
}
|