@sanity/ailf 2.9.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/dist/_vendor/ailf-core/artifact-capture/association.d.ts +37 -0
  2. package/dist/_vendor/ailf-core/artifact-capture/association.js +19 -0
  3. package/dist/_vendor/ailf-core/artifact-registry.d.ts +1 -1
  4. package/dist/_vendor/ailf-core/artifact-registry.js +1 -18
  5. package/dist/_vendor/ailf-core/batch-signing.d.ts +64 -0
  6. package/dist/_vendor/ailf-core/batch-signing.js +23 -0
  7. package/dist/_vendor/ailf-core/index.d.ts +2 -2
  8. package/dist/_vendor/ailf-core/index.js +2 -2
  9. package/dist/_vendor/ailf-core/ports/context.d.ts +12 -20
  10. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -2
  11. package/dist/_vendor/ailf-core/ports/index.js +1 -0
  12. package/dist/_vendor/ailf-core/ports/progress-reporter.d.ts +74 -0
  13. package/dist/_vendor/ailf-core/ports/progress-reporter.js +26 -0
  14. package/dist/_vendor/ailf-core/services/slim-report-summary.js +1 -16
  15. package/dist/adapters/config-sources/file-config-adapter.js +0 -4
  16. package/dist/adapters/progress/console-progress-reporter.d.ts +35 -0
  17. package/dist/adapters/progress/console-progress-reporter.js +110 -0
  18. package/dist/artifact-capture/api-gateway-artifact-writer.d.ts +8 -1
  19. package/dist/artifact-capture/api-gateway-artifact-writer.js +79 -42
  20. package/dist/artifact-capture/batching-api-gateway-artifact-writer.d.ts +108 -0
  21. package/dist/artifact-capture/batching-api-gateway-artifact-writer.js +492 -0
  22. package/dist/artifact-capture/fanout-artifact-writer.d.ts +14 -2
  23. package/dist/artifact-capture/fanout-artifact-writer.js +25 -4
  24. package/dist/artifact-capture/gcs-artifact-writer.d.ts +27 -1
  25. package/dist/artifact-capture/gcs-artifact-writer.js +168 -38
  26. package/dist/artifact-capture/instrumented-artifact-writer.d.ts +32 -0
  27. package/dist/artifact-capture/instrumented-artifact-writer.js +151 -0
  28. package/dist/artifact-capture/local-fs-artifact-writer.d.ts +8 -1
  29. package/dist/artifact-capture/local-fs-artifact-writer.js +23 -4
  30. package/dist/artifact-capture/parallel-emit.d.ts +43 -0
  31. package/dist/artifact-capture/parallel-emit.js +84 -0
  32. package/dist/artifact-capture/redact-artifact.d.ts +3 -5
  33. package/dist/artifact-capture/redact-artifact.js +3 -5
  34. package/dist/artifact-capture/upload-metrics.d.ts +62 -0
  35. package/dist/artifact-capture/upload-metrics.js +125 -0
  36. package/dist/cli.js +56 -2
  37. package/dist/commands/explain-handler.js +1 -5
  38. package/dist/commands/pipeline-action.d.ts +0 -4
  39. package/dist/commands/pipeline-action.js +11 -45
  40. package/dist/commands/pipeline.d.ts +1 -5
  41. package/dist/commands/pipeline.js +1 -5
  42. package/dist/commands/runs.d.ts +18 -0
  43. package/dist/commands/runs.js +71 -0
  44. package/dist/composition-root.d.ts +2 -2
  45. package/dist/composition-root.js +98 -38
  46. package/dist/orchestration/build-app-context.js +4 -7
  47. package/dist/orchestration/pipeline-orchestrator.js +100 -24
  48. package/dist/orchestration/steps/calculate-scores-step.js +1 -1
  49. package/dist/orchestration/steps/finalize-run-step.js +33 -2
  50. package/dist/pipeline/emit-eval-results.js +29 -11
  51. package/dist/pipeline/map-request-to-config.js +0 -4
  52. package/dist/pipeline/upload-test-outputs.d.ts +12 -5
  53. package/dist/pipeline/upload-test-outputs.js +27 -10
  54. package/package.json +3 -3
  55. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +0 -14
  56. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +0 -25
  57. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +0 -94
  58. package/dist/_vendor/ailf-core/ports/artifact-collector.js +0 -13
  59. package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +0 -138
  60. package/dist/_vendor/ailf-core/ports/capture-comparator.js +0 -10
  61. package/dist/artifact-capture/comparator.d.ts +0 -22
  62. package/dist/artifact-capture/comparator.js +0 -493
  63. package/dist/artifact-capture/filesystem-collector.d.ts +0 -60
  64. package/dist/artifact-capture/filesystem-collector.js +0 -262
  65. package/dist/artifact-capture/gcs-collector.d.ts +0 -55
  66. package/dist/artifact-capture/gcs-collector.js +0 -117
  67. package/dist/commands/capture-compare.d.ts +0 -15
  68. package/dist/commands/capture-compare.js +0 -253
  69. package/dist/commands/capture-list.d.ts +0 -12
  70. package/dist/commands/capture-list.js +0 -150
  71. package/dist/commands/capture.d.ts +0 -9
  72. package/dist/commands/capture.js +0 -16
@@ -33,3 +33,40 @@ export interface AssocContext {
33
33
  * boundary actually lives.
34
34
  */
35
35
  export declare function assoc(ctx: AssocContext, partial?: Omit<AssociationValues, "run">): AssociationValues;
36
+ /**
37
+ * Literacy-mode task descriptions carry a `(gold)` / `(baseline)` suffix that
38
+ * encodes which half of the with-docs / without-docs pair the judgment belongs
39
+ * to. The D0033 artifact axes treat that variant as the `mode` axis — a single
40
+ * promptfoo run of evaluation mode "literacy" produces artifacts whose `mode`
41
+ * axis is "gold" or "baseline". All producers and all readers must agree on
42
+ * this decomposition, or signed-URL lookups 404.
43
+ *
44
+ * This helper is the single source of truth for the split. It's shared between:
45
+ * - eval's `emit-eval-results.ts` (rawResults, renderedPrompts,
46
+ * graderPrompts, graderJudgments)
47
+ * - eval's `upload-test-outputs.ts` (testOutputs)
48
+ * - core's `slim-report-summary.ts` (slim judgment / failure-mode ids)
49
+ * - studio's `JudgmentList#testOutputsKeyFor` (hover prefetch keys)
50
+ *
51
+ * Non-literacy modes (mcp-server, agent-harness, …) do not suffix their task
52
+ * descriptions; `splitTaskVariant` returns `variant: null` and the caller
53
+ * falls back to the high-level eval mode. See `resolveVariantMode`.
54
+ *
55
+ * Pure function — safe to call from browser or Node.
56
+ */
57
+ export interface TaskVariantSplit {
58
+ /** Task id with any trailing `(gold)` / `(baseline)` suffix stripped. */
59
+ readonly task: string;
60
+ /** Lowercased variant (`"gold"` | `"baseline"`), or `null` when absent. */
61
+ readonly variant: "baseline" | "gold" | null;
62
+ }
63
+ export declare function splitTaskVariant(taskId: string): TaskVariantSplit;
64
+ /**
65
+ * Resolve the `mode` axis value for an artifact association: prefer the
66
+ * per-task variant when present, fall back to the high-level evaluation mode.
67
+ * Mirrors `mode = variant ?? defaultMode` in `slim-report-summary#slimJudgments`.
68
+ */
69
+ export declare function resolveVariantMode(taskId: string, defaultMode: string): {
70
+ mode: string;
71
+ task: string;
72
+ };
@@ -26,3 +26,22 @@
26
26
  export function assoc(ctx, partial = {}) {
27
27
  return { run: ctx.runId, ...partial };
28
28
  }
29
+ const VARIANT_SUFFIX_PATTERN = /\s*\((gold|baseline)\)\s*$/i;
30
+ export function splitTaskVariant(taskId) {
31
+ const match = VARIANT_SUFFIX_PATTERN.exec(taskId);
32
+ if (!match)
33
+ return { task: taskId, variant: null };
34
+ return {
35
+ task: taskId.slice(0, match.index).trim(),
36
+ variant: match[1].toLowerCase(),
37
+ };
38
+ }
39
+ /**
40
+ * Resolve the `mode` axis value for an artifact association: prefer the
41
+ * per-task variant when present, fall back to the high-level evaluation mode.
42
+ * Mirrors `mode = variant ?? defaultMode` in `slim-report-summary#slimJudgments`.
43
+ */
44
+ export function resolveVariantMode(taskId, defaultMode) {
45
+ const { task, variant } = splitTaskVariant(taskId);
46
+ return { mode: variant ?? defaultMode, task };
47
+ }
@@ -41,7 +41,7 @@ export type ArtifactMime = "application/json" | "application/x-ndjson" | "text/m
41
41
  */
42
42
  export type ArtifactTruncationPolicy = "reject" | "trailing-truncate" | "fielded-truncate" | "trial-oversize";
43
43
  /** The union of every artifact type known to AILF. */
44
- export type ArtifactType = "runManifest" | "scoreSummary" | "pipelineResult" | "pipelineContext" | "documentManifest" | "prComment" | "readinessReport" | "reportSnapshot" | "autoComparison" | "gapReport" | "sinkResults" | "callbackRequest" | "callbackResponse" | "configSnapshot" | "evalConfigGenerated" | "comparisonReport" | "discoveryReport" | "failureModes" | "taskDefinitions" | "renderedPrompts" | "rawResults" | "testOutputs" | "graderPrompts" | "graderJudgments" | "traces" | "evalResults";
44
+ export type ArtifactType = "runManifest" | "scoreSummary" | "pipelineResult" | "pipelineContext" | "documentManifest" | "prComment" | "readinessReport" | "reportSnapshot" | "autoComparison" | "gapReport" | "sinkResults" | "callbackRequest" | "callbackResponse" | "configSnapshot" | "evalConfigGenerated" | "comparisonReport" | "discoveryReport" | "failureModes" | "taskDefinitions" | "renderedPrompts" | "rawResults" | "testOutputs" | "graderPrompts" | "graderJudgments" | "traces";
45
45
  /**
46
46
  * Result of parsing a per-entry key into a sanitized filename component.
47
47
  * Success carries the sanitized value; failure carries a reason for 4xx responses.
@@ -333,7 +333,7 @@ function buildDescriptor(input) {
333
333
  };
334
334
  }
335
335
  // ---------------------------------------------------------------------------
336
- // The registry — 21 live descriptors + 1 deprecated (evalResults)
336
+ // The registry — 21 live descriptors
337
337
  // ---------------------------------------------------------------------------
338
338
  /**
339
339
  * The canonical artifact descriptor for every artifact type. Iterate with
@@ -630,23 +630,6 @@ export const ARTIFACT_REGISTRY = {
630
630
  capBytes: 10_000_000,
631
631
  truncation: "trial-oversize",
632
632
  }),
633
- /**
634
- * @deprecated Emit removed in W0050 (no producer calls `emit("evalResults")`
635
- * any more — `emit-eval-results.ts` decomposes the promptfoo aggregate into
636
- * per-entry rawResults / renderedPrompts / graderPrompts / graderJudgments
637
- * instead). Descriptor retained for read-compat on pre-W0050 reports until
638
- * W0052 removes it entirely. No code path should re-introduce emission.
639
- */
640
- evalResults: buildDescriptor({
641
- type: "evalResults",
642
- slug: "eval-results",
643
- layout: "bulk",
644
- axes: ["run"],
645
- entrySchema: unknownEntry,
646
- mime: "application/json",
647
- capBytes: 10_000_000,
648
- optional: true,
649
- }),
650
633
  };
651
634
  /** All artifact types in declaration order. */
652
635
  export const ARTIFACT_TYPES = Object.keys(ARTIFACT_REGISTRY);
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Batch signed-URL types (D0033 / W0052 M3).
3
+ *
4
+ * Contract shared between the API Gateway and Studio for the two batch
5
+ * endpoints that amortise signing over many artifacts in a single round-trip:
6
+ *
7
+ * POST /v1/runs/:runId/artifacts/batch/read-urls
8
+ * POST /v1/runs/:runId/artifacts/batch/upload-urls
9
+ *
10
+ * The batch shape is intentionally symmetric: request carries the set of
11
+ * artifact types and (for per-entry layouts) the entry keys; response carries
12
+ * signed URLs keyed by type and entry key. For bulk layouts the response key
13
+ * is the empty string, since bulk artifacts have no entry dimension.
14
+ *
15
+ * Validation is all-or-nothing per AC 4 — a single malformed entry key, an
16
+ * unknown artifact type, or a missing keys list for a per-entry type causes
17
+ * the whole request to 400 with zero signed URLs emitted.
18
+ *
19
+ * @see docs/design-docs/unified-run-artifacts.md § M3
20
+ * @see docs/decisions/D0033-unified-run-anchored-artifact-capture.md
21
+ */
22
+ import type { ArtifactType } from "./artifact-registry.js";
23
+ /** The empty entry-key sentinel used for bulk responses. */
24
+ export declare const BULK_ENTRY_KEY: "";
25
+ /**
26
+ * Batch request body.
27
+ *
28
+ * - `types` — the artifact types to sign. Must be non-empty. Unknown types
29
+ * cause a 400.
30
+ * - `keys` — per-type arrays of entry keys. Required for per-entry layouts;
31
+ * omitted (or an empty array) for bulk layouts. Extra keys for a bulk type
32
+ * are rejected; missing keys for a per-entry type are rejected.
33
+ */
34
+ export interface BatchSignRequest {
35
+ readonly types: readonly ArtifactType[];
36
+ readonly keys?: Partial<Record<ArtifactType, readonly string[]>>;
37
+ }
38
+ /** One signed read URL in a batch read response. */
39
+ export interface BatchSignedReadUrl {
40
+ readonly url: string;
41
+ readonly path: string;
42
+ readonly expiresIn: number;
43
+ }
44
+ /** One signed upload URL in a batch upload response. */
45
+ export interface BatchSignedUploadUrl {
46
+ readonly url: string;
47
+ readonly method: "PUT";
48
+ readonly path: string;
49
+ readonly expiresIn: number;
50
+ readonly requiredHeaders: Readonly<Record<string, string>>;
51
+ }
52
+ /**
53
+ * Batch response. Outer map is keyed by artifact type; inner map is keyed by
54
+ * entry key (empty string for bulk layouts). All types requested in the
55
+ * request body are present in the response, even when `keys[type]` was empty.
56
+ */
57
+ export interface BatchSignReadResponse {
58
+ readonly bucket: string;
59
+ readonly urls: Partial<Record<ArtifactType, Readonly<Record<string, BatchSignedReadUrl>>>>;
60
+ }
61
+ export interface BatchSignUploadResponse {
62
+ readonly bucket: string;
63
+ readonly urls: Partial<Record<ArtifactType, Readonly<Record<string, BatchSignedUploadUrl>>>>;
64
+ }
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Batch signed-URL types (D0033 / W0052 M3).
3
+ *
4
+ * Contract shared between the API Gateway and Studio for the two batch
5
+ * endpoints that amortise signing over many artifacts in a single round-trip:
6
+ *
7
+ * POST /v1/runs/:runId/artifacts/batch/read-urls
8
+ * POST /v1/runs/:runId/artifacts/batch/upload-urls
9
+ *
10
+ * The batch shape is intentionally symmetric: request carries the set of
11
+ * artifact types and (for per-entry layouts) the entry keys; response carries
12
+ * signed URLs keyed by type and entry key. For bulk layouts the response key
13
+ * is the empty string, since bulk artifacts have no entry dimension.
14
+ *
15
+ * Validation is all-or-nothing per AC 4 — a single malformed entry key, an
16
+ * unknown artifact type, or a missing keys list for a per-entry type causes
17
+ * the whole request to 400 with zero signed URLs emitted.
18
+ *
19
+ * @see docs/design-docs/unified-run-artifacts.md § M3
20
+ * @see docs/decisions/D0033-unified-run-anchored-artifact-capture.md
21
+ */
22
+ /** The empty entry-key sentinel used for bulk responses. */
23
+ export const BULK_ENTRY_KEY = "";
@@ -16,9 +16,9 @@ export * from "./ports/index.js";
16
16
  export * from "./services/index.js";
17
17
  export * from "./examples/index.js";
18
18
  export * from "./artifact-registry.js";
19
+ export * from "./batch-signing.js";
19
20
  export { defineConfig, defineFeatures, defineModeBase, defineModels, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./config-helpers.js";
20
21
  export type { PricingEntry, PromptEntry, SourceEntry, } from "./config-helpers.js";
21
22
  export { env } from "./env-helper.js";
22
- export { NoOpArtifactCollector } from "./artifact-capture/noop-collector.js";
23
23
  export { NoOpArtifactWriter, NotImplementedError, } from "./ports/artifact-writer.js";
24
- export { assoc, type AssocContext } from "./artifact-capture/association.js";
24
+ export { assoc, resolveVariantMode, splitTaskVariant, type AssocContext, type TaskVariantSplit, } from "./artifact-capture/association.js";
@@ -16,11 +16,11 @@ export * from "./ports/index.js";
16
16
  export * from "./services/index.js";
17
17
  export * from "./examples/index.js";
18
18
  export * from "./artifact-registry.js";
19
+ export * from "./batch-signing.js";
19
20
  // ---------------------------------------------------------------------------
20
21
  // Architecture overhaul — Phase 0 helpers
21
22
  // ---------------------------------------------------------------------------
22
23
  export { defineConfig, defineFeatures, defineModeBase, defineModels, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./config-helpers.js";
23
24
  export { env } from "./env-helper.js";
24
- export { NoOpArtifactCollector } from "./artifact-capture/noop-collector.js";
25
25
  export { NoOpArtifactWriter, NotImplementedError, } from "./ports/artifact-writer.js";
26
- export { assoc } from "./artifact-capture/association.js";
26
+ export { assoc, resolveVariantMode, splitTaskVariant, } from "./artifact-capture/association.js";
@@ -13,12 +13,12 @@
13
13
  */
14
14
  import type { RunId } from "../types/branded-ids.js";
15
15
  import type { DebugOptions, EvalMode, PluginRegistry } from "../types/index.js";
16
- import type { ArtifactCollector } from "./artifact-collector.js";
17
16
  import type { ArtifactWriter } from "./artifact-writer.js";
18
17
  import type { CacheStore } from "./cache-store.js";
19
18
  import type { DocFetcher } from "./doc-fetcher.js";
20
19
  import type { EvalRunner } from "./eval-runner.js";
21
20
  import type { Logger } from "./logger.js";
21
+ import type { ProgressReporter } from "./progress-reporter.js";
22
22
  import type { TaskSource } from "./task-source.js";
23
23
  /**
24
24
  * Resolved pipeline configuration — the typed, validated result of
@@ -153,19 +153,10 @@ export interface ResolvedConfig {
153
153
  apiKey?: string;
154
154
  /** External preset file paths or npm package names to load */
155
155
  presets?: string[];
156
- /** Whether artifact capture is enabled for this run (default: false) */
157
- captureEnabled?: boolean;
158
- /** Base directory for capture output (default: results/captures/) */
159
- captureDir?: string;
160
- /** Whether to compress capture output to tar.gz (default: true) */
161
- captureCompress?: boolean;
162
- /** Whether to include mode-specific extra artifacts (default: true) */
163
- captureExtras?: boolean;
164
156
  /**
165
- * D0033 / W0049 — the unified artifact surface. Wired into the writer in
166
- * W0050; consumed by the writer factory to decide whether to attach a
167
- * writer at all, where it writes to, and what to skip. These fields are
168
- * additive and do not replace the legacy `capture*` fields until W0052.
157
+ * D0033 unified artifact surface. Consumed by the writer factory to
158
+ * decide whether to attach a writer at all, where it writes to, and
159
+ * what to skip. Legacy `capture*` fields were retired in W0052.
169
160
  */
170
161
  /** Disables all artifact writers — `--no-artifacts`. */
171
162
  artifactsDisabled?: boolean;
@@ -173,12 +164,8 @@ export interface ResolvedConfig {
173
164
  artifactsDir?: string;
174
165
  /** Run writers in dry-run mode — `--artifacts-dry-run`. */
175
166
  artifactsDryRun?: boolean;
176
- /** Comma-separated artifact types to skip — `--capture-exclude`. */
167
+ /** Comma-separated artifact types to skip — `--artifacts-exclude`. */
177
168
  artifactsExclude?: readonly string[];
178
- /** GCS bucket for capture upload (enables GCS decorator when set) */
179
- captureGcsBucket?: string;
180
- /** GCS object prefix for capture uploads (default: "captures/") */
181
- captureGcsPrefix?: string;
182
169
  /**
183
170
  * GCS bucket for report artifact uploads. Defaults to "ailf-artifacts"
184
171
  * at the composition root — only set this to override (e.g., self-hosted
@@ -221,8 +208,6 @@ export interface AppContext {
221
208
  readonly artifactWriter: ArtifactWriter;
222
209
  /** Evaluation caching (filesystem + optional Content Lake fallback) */
223
210
  readonly cache?: CacheStore;
224
- /** Artifact capture collector (no-op when --capture is not set) */
225
- readonly collector: ArtifactCollector;
226
211
  /** Resolved pipeline configuration */
227
212
  readonly config: ResolvedConfig;
228
213
  /** Documentation context fetcher */
@@ -231,6 +216,13 @@ export interface AppContext {
231
216
  readonly evalRunner: EvalRunner;
232
217
  /** Structured logger */
233
218
  readonly logger: Logger;
219
+ /**
220
+ * Progress reporter — carries `phase-start / phase-progress / phase-complete`
221
+ * events for long-running pipeline spans (W0053). The composition root always
222
+ * wires one; adapters default to `NoOpProgressReporter` for quiet / JSON /
223
+ * test loggers.
224
+ */
225
+ readonly progress: ProgressReporter;
234
226
  /** Plugin registry — mode handlers, assertions, rubric templates, etc. */
235
227
  readonly registry: PluginRegistry;
236
228
  /**
@@ -4,10 +4,8 @@
4
4
  * Ports define the contracts between the domain kernel and the outside world.
5
5
  * Adapters (in packages/eval) implement these interfaces.
6
6
  */
7
- export type { ArtifactCollector, CaptureFlushResult, CaptureManifest, CaptureManifestEntry, } from "./artifact-collector.js";
8
7
  export type { ArtifactEntry, ArtifactWriter } from "./artifact-writer.js";
9
8
  export { NoOpArtifactWriter } from "./artifact-writer.js";
10
- export type { ArtifactContentDiff, CaptureDiffReport, ComparisonMode, ComparisonOptions, InventoryDiff, JsonDiffEntry, MetadataComparison, ScoreComparison, SecurityScan, TimingComparison, } from "./capture-comparator.js";
11
9
  export type { CacheEntryMetadata, CacheKey, CacheLookupResult, CacheRecordInput, CacheStore, } from "./cache-store.js";
12
10
  export type { ConfigSource } from "./config-source.js";
13
11
  export type { AppContext, ReportSinkPort, ReportStorePort, ResolvedConfig, } from "./context.js";
@@ -16,5 +14,7 @@ export type { EvalRunConfig, EvalRunner } from "./eval-runner.js";
16
14
  export type { CompilationContext, CompileResultAssertion, CompileResultPrompt, CompileResultProvider, CompileResultTestCase, ModeCompileResult, ModeHandler, ModeProviderEntry, ModeRubricConfig, PromptTemplate, } from "./mode-handler.js";
17
15
  export type { Logger } from "./logger.js";
18
16
  export type { PipelineStep } from "./pipeline-step.js";
17
+ export type { ArtifactWriterProgressOptions, PhaseCompleteEvent, PhaseProgressEvent, PhaseStartEvent, ProgressReporter, } from "./progress-reporter.js";
18
+ export { ARTIFACT_EXPORT_PHASE_ID, NoOpProgressReporter, } from "./progress-reporter.js";
19
19
  export type { TaskSource } from "./task-source.js";
20
20
  export { canonicalDocRefLabel, isIdRef, isPathRef, isPerspectiveRef, isSlugRef, isTemplatedAssertion, } from "./task-source.js";
@@ -5,4 +5,5 @@
5
5
  * Adapters (in packages/eval) implement these interfaces.
6
6
  */
7
7
  export { NoOpArtifactWriter } from "./artifact-writer.js";
8
+ export { ARTIFACT_EXPORT_PHASE_ID, NoOpProgressReporter, } from "./progress-reporter.js";
8
9
  export { canonicalDocRefLabel, isIdRef, isPathRef, isPerspectiveRef, isSlugRef, isTemplatedAssertion, } from "./task-source.js";
@@ -0,0 +1,74 @@
1
+ /**
2
+ * Port: ProgressReporter — carries user-visible progress for long-running
3
+ * pipeline phases (artifact export, eval runs, etc.).
4
+ *
5
+ * Events follow a `start / progress (batch) / complete` shape. Producers
6
+ * publish events without formatting; the CLI adapter decides cadence,
7
+ * layout, and whether to render at all.
8
+ *
9
+ * Scope today: the artifact export phase that runs after promptfoo finishes
10
+ * evaluation (W0053). Forward-compatible with future callers — e.g. the eval
11
+ * phase itself once we replace promptfoo's terminal-owned progress bar.
12
+ *
13
+ * @see docs/work-items/W0053-cli-artifact-upload-progress.json
14
+ */
15
+ export interface PhaseStartEvent {
16
+ /** Stable phase identifier. Adapters may use this to disambiguate if phases overlap. */
17
+ phaseId: string;
18
+ /** Human-readable label (e.g. `"Exporting run artifacts"`). */
19
+ label: string;
20
+ /**
21
+ * Optional extra context shown only on the header line (e.g. destination
22
+ * info such as `"local + GCS"`). Kept separate from `label` so the
23
+ * per-progress-line prefix stays short and scannable on every line.
24
+ */
25
+ detail?: string;
26
+ /** Total work expected, when known up front. */
27
+ totalItems?: number;
28
+ /** Total bytes expected, when known up front. */
29
+ totalBytes?: number;
30
+ /** Wall-clock timestamp when the phase started (ms since epoch). */
31
+ startedAt: number;
32
+ }
33
+ export interface PhaseProgressEvent {
34
+ phaseId: string;
35
+ /** Items completed in this batch. Default `1`. */
36
+ items?: number;
37
+ /** Bytes transferred in this batch. Default `0`. */
38
+ bytes?: number;
39
+ /** Label for the most recent item (e.g. artifact path). */
40
+ label?: string;
41
+ }
42
+ export interface PhaseCompleteEvent {
43
+ phaseId: string;
44
+ itemsCompleted: number;
45
+ bytesCompleted: number;
46
+ durationMs: number;
47
+ /** Optional override for the final summary line. */
48
+ summary?: string;
49
+ }
50
+ export interface ProgressReporter {
51
+ phaseStart(event: PhaseStartEvent): void;
52
+ phaseProgress(event: PhaseProgressEvent): void;
53
+ phaseComplete(event: PhaseCompleteEvent): void;
54
+ }
55
+ /** No-op reporter — used when progress rendering is disabled (quiet / JSON logger / tests). */
56
+ export declare class NoOpProgressReporter implements ProgressReporter {
57
+ phaseStart(): void;
58
+ phaseProgress(): void;
59
+ phaseComplete(): void;
60
+ }
61
+ /**
62
+ * Canonical phase identifier for the post-eval artifact export span (W0053).
63
+ * Shared between writer wiring and the pipeline orchestrator so both sides
64
+ * publish to the same phase.
65
+ */
66
+ export declare const ARTIFACT_EXPORT_PHASE_ID = "artifact-export";
67
+ /**
68
+ * Options shared by artifact writer adapters that publish progress events.
69
+ * Kept in the port module so every writer adapter imports the same shape.
70
+ */
71
+ export interface ArtifactWriterProgressOptions {
72
+ reporter: ProgressReporter;
73
+ phaseId: string;
74
+ }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Port: ProgressReporter — carries user-visible progress for long-running
3
+ * pipeline phases (artifact export, eval runs, etc.).
4
+ *
5
+ * Events follow a `start / progress (batch) / complete` shape. Producers
6
+ * publish events without formatting; the CLI adapter decides cadence,
7
+ * layout, and whether to render at all.
8
+ *
9
+ * Scope today: the artifact export phase that runs after promptfoo finishes
10
+ * evaluation (W0053). Forward-compatible with future callers — e.g. the eval
11
+ * phase itself once we replace promptfoo's terminal-owned progress bar.
12
+ *
13
+ * @see docs/work-items/W0053-cli-artifact-upload-progress.json
14
+ */
15
+ /** No-op reporter — used when progress rendering is disabled (quiet / JSON logger / tests). */
16
+ export class NoOpProgressReporter {
17
+ phaseStart() { }
18
+ phaseProgress() { }
19
+ phaseComplete() { }
20
+ }
21
+ /**
22
+ * Canonical phase identifier for the post-eval artifact export span (W0053).
23
+ * Shared between writer wiring and the pipeline orchestrator so both sides
24
+ * publish to the same phase.
25
+ */
26
+ export const ARTIFACT_EXPORT_PHASE_ID = "artifact-export";
@@ -15,6 +15,7 @@
15
15
  * @see docs/decisions/D0033-unified-run-anchored-artifact-capture.md (§ M7)
16
16
  * @see docs/work-items/W0051-report-slim-down-manifest-preview-hooks.json
17
17
  */
18
+ import { splitTaskVariant } from "../artifact-capture/association.js";
18
19
  import { ARTIFACT_REGISTRY } from "../artifact-registry.js";
19
20
  /**
20
21
  * Transform a full pipeline `ScoreSummary` into its slim Report counterpart.
@@ -49,22 +50,6 @@ export function buildSlimReportSummary(summary, mode) {
49
50
  // ---------------------------------------------------------------------------
50
51
  // Judgments — axes {mode, task, model, grader}
51
52
  // ---------------------------------------------------------------------------
52
- /**
53
- * Variant-suffix stripper. Judgments' `taskId` today carries `(gold)` /
54
- * `(baseline)` suffixes that encode the pipeline mode. We strip the suffix
55
- * to build the canonical `task` axis value and use the caller-supplied
56
- * `mode` for the `mode` axis — matching how `formatEntryKey` is computed
57
- * at producer emit time.
58
- */
59
- function splitTaskVariant(taskId) {
60
- const match = /\s*\((gold|baseline)\)\s*$/i.exec(taskId);
61
- if (!match)
62
- return { task: taskId, variant: null };
63
- return {
64
- task: taskId.slice(0, match.index).trim(),
65
- variant: match[1].toLowerCase(),
66
- };
67
- }
68
53
  function slimJudgments(full, defaultMode) {
69
54
  const descriptor = ARTIFACT_REGISTRY.graderJudgments;
70
55
  const formatKey = descriptor.formatEntryKey;
@@ -120,10 +120,6 @@ function mapEvalConfigToResolvedConfig(config, rootDir) {
120
120
  allowedOrigins: config.allowedOrigins,
121
121
  searchMode: config.searchMode ?? "open",
122
122
  concurrency: config.concurrency,
123
- captureEnabled: false,
124
- captureDir: undefined,
125
- captureCompress: true,
126
- captureExtras: true,
127
123
  remote: false,
128
124
  apiUrl: "https://ailf-api.sanity.build",
129
125
  presets: config.presets,
@@ -0,0 +1,35 @@
1
+ /**
2
+ * ConsoleProgressReporter — the default terminal adapter for the
3
+ * ProgressReporter port (W0053).
4
+ *
5
+ * Renders phase-start / phase-progress / phase-complete as human-readable
6
+ * lines on stdout. Progress lines are rate-limited so a high-frequency
7
+ * producer (one event per artifact) never floods the terminal; the adapter
8
+ * flushes at most once per `tickMs` (default 1s) and always flushes the
9
+ * final line on `phaseComplete`.
10
+ *
11
+ * `verbose: true` emits one line per item — matches the CLI's existing
12
+ * `--verbose` semantics without re-rendering the rate-limited line twice.
13
+ */
14
+ import type { PhaseCompleteEvent, PhaseProgressEvent, PhaseStartEvent, ProgressReporter } from "../../_vendor/ailf-core/index.d.ts";
15
+ export interface ConsoleProgressReporterOptions {
16
+ /** Minimum interval between rate-limited progress lines, in ms. Default 1000. */
17
+ tickMs?: number;
18
+ /** Emit one line per item rather than rate-limiting. */
19
+ verbose?: boolean;
20
+ /** Sink for output (tests override). Defaults to `console.log`. */
21
+ write?: (line: string) => void;
22
+ /** Clock (tests override). Defaults to `Date.now`. */
23
+ now?: () => number;
24
+ }
25
+ export declare class ConsoleProgressReporter implements ProgressReporter {
26
+ private readonly tickMs;
27
+ private readonly verbose;
28
+ private readonly write;
29
+ private readonly now;
30
+ private readonly phases;
31
+ constructor(options?: ConsoleProgressReporterOptions);
32
+ phaseStart(event: PhaseStartEvent): void;
33
+ phaseProgress(event: PhaseProgressEvent): void;
34
+ phaseComplete(event: PhaseCompleteEvent): void;
35
+ }
@@ -0,0 +1,110 @@
1
+ /**
2
+ * ConsoleProgressReporter — the default terminal adapter for the
3
+ * ProgressReporter port (W0053).
4
+ *
5
+ * Renders phase-start / phase-progress / phase-complete as human-readable
6
+ * lines on stdout. Progress lines are rate-limited so a high-frequency
7
+ * producer (one event per artifact) never floods the terminal; the adapter
8
+ * flushes at most once per `tickMs` (default 1s) and always flushes the
9
+ * final line on `phaseComplete`.
10
+ *
11
+ * `verbose: true` emits one line per item — matches the CLI's existing
12
+ * `--verbose` semantics without re-rendering the rate-limited line twice.
13
+ */
14
+ export class ConsoleProgressReporter {
15
+ tickMs;
16
+ verbose;
17
+ write;
18
+ now;
19
+ phases = new Map();
20
+ constructor(options = {}) {
21
+ this.tickMs = options.tickMs ?? 1000;
22
+ this.verbose = options.verbose ?? false;
23
+ this.write = options.write ?? ((line) => console.log(line));
24
+ this.now = options.now ?? (() => Date.now());
25
+ }
26
+ phaseStart(event) {
27
+ this.phases.set(event.phaseId, {
28
+ phaseId: event.phaseId,
29
+ label: event.label,
30
+ totalItems: event.totalItems,
31
+ totalBytes: event.totalBytes,
32
+ startedAt: event.startedAt,
33
+ itemsCompleted: 0,
34
+ bytesCompleted: 0,
35
+ lastFlushAt: event.startedAt,
36
+ });
37
+ const total = event.totalItems !== undefined ? ` (${event.totalItems} items)` : "";
38
+ const detail = event.detail ? ` → ${event.detail}` : "";
39
+ this.write("");
40
+ this.write(` ➜ ${event.label}${detail}${total}…`);
41
+ }
42
+ phaseProgress(event) {
43
+ const state = this.phases.get(event.phaseId);
44
+ if (!state)
45
+ return;
46
+ state.itemsCompleted += event.items ?? 1;
47
+ state.bytesCompleted += event.bytes ?? 0;
48
+ if (this.verbose) {
49
+ const item = event.label ?? `item ${state.itemsCompleted}`;
50
+ const bytes = event.bytes ?? 0;
51
+ this.write(` · ${progressPrefix(state)} ${item} (${formatBytes(bytes)})`);
52
+ return;
53
+ }
54
+ const now = this.now();
55
+ if (now - state.lastFlushAt < this.tickMs)
56
+ return;
57
+ state.lastFlushAt = now;
58
+ this.write(` ${renderProgressLine(state, now)}`);
59
+ }
60
+ phaseComplete(event) {
61
+ const state = this.phases.get(event.phaseId);
62
+ // Prefer the adapter's running totals — we've been counting each batch
63
+ // event anyway, and most producers call `phaseComplete` without knowing
64
+ // cumulative numbers themselves. Fall back to the event fields when the
65
+ // reporter has no state (e.g. first phaseComplete without a phaseStart).
66
+ const itemsCompleted = Math.max(state?.itemsCompleted ?? 0, event.itemsCompleted);
67
+ const bytesCompleted = Math.max(state?.bytesCompleted ?? 0, event.bytesCompleted);
68
+ const durationMs = event.durationMs;
69
+ const label = state?.label ?? event.phaseId;
70
+ const summary = event.summary ??
71
+ `${itemsCompleted} item${itemsCompleted === 1 ? "" : "s"} · ${formatBytes(bytesCompleted)} · ${formatDuration(durationMs)} elapsed`;
72
+ this.write(` ✓ ${label} — ${summary}`);
73
+ this.phases.delete(event.phaseId);
74
+ }
75
+ }
76
+ // ---------------------------------------------------------------------------
77
+ // Helpers
78
+ // ---------------------------------------------------------------------------
79
+ /**
80
+ * Shared prefix used on every rate-limited and verbose progress line so the
81
+ * context is visible without having to scroll up to the phase header.
82
+ */
83
+ function progressPrefix(state) {
84
+ return `${state.label}:`;
85
+ }
86
+ function renderProgressLine(state, now) {
87
+ const elapsed = formatDuration(now - state.startedAt);
88
+ const itemsPart = state.totalItems !== undefined
89
+ ? `${state.itemsCompleted}/${state.totalItems}`
90
+ : String(state.itemsCompleted);
91
+ return `${progressPrefix(state)} ${itemsPart} · ${formatBytes(state.bytesCompleted)} · ${elapsed} elapsed`;
92
+ }
93
+ function formatBytes(bytes) {
94
+ if (bytes < 1024)
95
+ return `${bytes} B`;
96
+ if (bytes < 1024 * 1024)
97
+ return `${(bytes / 1024).toFixed(1)} KB`;
98
+ if (bytes < 1024 * 1024 * 1024)
99
+ return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
100
+ return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
101
+ }
102
+ function formatDuration(ms) {
103
+ if (ms < 1000)
104
+ return `${ms}ms`;
105
+ if (ms < 60_000)
106
+ return `${(ms / 1000).toFixed(1)}s`;
107
+ const minutes = Math.floor(ms / 60_000);
108
+ const seconds = Math.round((ms % 60_000) / 1000);
109
+ return `${minutes}m${seconds}s`;
110
+ }
@@ -8,7 +8,7 @@
8
8
  * Endpoints:
9
9
  * - Bulk: GET {apiBaseUrl}/v1/runs/{runId}/artifacts/{type}/upload-url
10
10
  * - Per-entry: GET {apiBaseUrl}/v1/runs/{runId}/artifacts/{type}/{entryKey}/upload-url
11
- * - Manifest: GET {apiBaseUrl}/v1/runs/{runId}/artifacts/manifest/upload-url
11
+ * - Manifest: GET {apiBaseUrl}/v1/runs/{runId}/artifacts/upload-url
12
12
  *
13
13
  * ## W0049 API surface
14
14
  *
@@ -28,6 +28,7 @@
28
28
  * @see docs/decisions/D0033-unified-run-anchored-artifact-capture.md
29
29
  */
30
30
  import { type ArtifactEntry, type ArtifactRef, type ArtifactType, type ArtifactWriter, type AssociationValues, type RunId, type RunManifest } from "../_vendor/ailf-core/index.d.ts";
31
+ import { type UploadMetricsSink } from "./upload-metrics.js";
31
32
  export interface ApiGatewayArtifactWriterOptions {
32
33
  /** Base URL of the API gateway (e.g., "https://ailf-api.sanity.build"). */
33
34
  apiBaseUrl: string;
@@ -35,9 +36,15 @@ export interface ApiGatewayArtifactWriterOptions {
35
36
  apiKey: string;
36
37
  /** GCS bucket name — included in the returned ArtifactRef. */
37
38
  bucket: string;
39
+ /**
40
+ * W0056 spike — optional metrics sink that receives per-phase timing events.
41
+ * Defaults to a no-op so the hot path stays free when metrics are off.
42
+ */
43
+ metrics?: UploadMetricsSink;
38
44
  }
39
45
  export declare class ApiGatewayArtifactWriter implements ArtifactWriter {
40
46
  private readonly options;
47
+ private readonly metrics;
41
48
  constructor(options: ApiGatewayArtifactWriterOptions);
42
49
  emit<T extends ArtifactType>(type: T, association: AssociationValues, payload: unknown): Promise<ArtifactRef | null>;
43
50
  appendNdjson(): Promise<ArtifactRef | null>;