@sanity/ailf 2.4.0 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -164,8 +164,25 @@ export interface ResolvedConfig {
164
164
  captureGcsBucket?: string;
165
165
  /** GCS object prefix for capture uploads (default: "captures/") */
166
166
  captureGcsPrefix?: string;
167
- /** GCS bucket for report artifact uploads — enables ArtifactUploader (D0030) */
167
+ /**
168
+ * GCS bucket for report artifact uploads. Defaults to "ailf-artifacts"
169
+ * at the composition root — only set this to override (e.g., self-hosted
170
+ * deployment with a different bucket). Read access is governed by the
171
+ * gateway's signing credentials, so alternate bucket names require
172
+ * reconfiguring the gateway as well (D0030).
173
+ */
168
174
  artifactGcsBucket?: string;
175
+ /**
176
+ * Controls whether the ArtifactUploader is constructed.
177
+ *
178
+ * - `undefined` (default): auto — construct when credentials are available
179
+ * (ADC for direct GCS, or AILF_API_KEY for gateway-signed URLs).
180
+ * - `true`: force-enable — still a no-op if no credentials are present (P5).
181
+ * - `false`: force-disable — skip artifact upload even when credentials exist.
182
+ *
183
+ * Sourced from AILF_ARTIFACT_UPLOAD env var or `artifactUpload` in ailf.config.ts.
184
+ */
185
+ artifactUpload?: boolean;
169
186
  }
170
187
  /**
171
188
  * Application context — the complete dependency carrier.
@@ -15,8 +15,18 @@
15
15
  import { existsSync } from "fs";
16
16
  import { resolve } from "path";
17
17
  import { PipelineRequestSchema, } from "../../_vendor/ailf-core/index.js";
18
+ import { LEGACY_EVAL_MODE_ALIASES } from "../../_vendor/ailf-shared/index.js";
18
19
  import { LiteracyVariant } from "../../pipeline/normalize-mode.js";
19
20
  import { RepoTaskSource } from "../task-sources/repo-task-source.js";
21
+ const LEGACY_LITERACY_VARIANT_SET = new Set(LEGACY_EVAL_MODE_ALIASES);
22
+ /**
23
+ * Resolve a raw `config.mode` (which may be a CLI literacy variant such as
24
+ * `"baseline"` or `"full"`) to the canonical task-level mode that appears on
25
+ * `GeneralizedTaskDefinition.mode`. Literacy variants all map to `"literacy"`.
26
+ */
27
+ function resolveCanonicalTaskMode(configMode) {
28
+ return LEGACY_LITERACY_VARIANT_SET.has(configMode) ? "literacy" : configMode;
29
+ }
20
30
  // ---------------------------------------------------------------------------
21
31
  // Public API
22
32
  // ---------------------------------------------------------------------------
@@ -33,10 +43,18 @@ import { RepoTaskSource } from "../task-sources/repo-task-source.js";
33
43
  */
34
44
  export async function buildRemoteRequest(options) {
35
45
  const { tasksDir, config } = options;
36
- // 1. Load and validate local tasks
46
+ // 1. Load and validate local tasks, filtered to the requested mode.
47
+ // `config.mode` may be a literacy variant (baseline/agentic/full/observed)
48
+ // — those all map to task mode "literacy". Other modes match 1:1.
37
49
  const taskSource = new RepoTaskSource(tasksDir);
38
50
  const filterOptions = buildFilterOptions(config);
39
- const tasks = (await taskSource.loadTasks(filterOptions)).filter((t) => t.mode === "literacy");
51
+ const allTasks = await taskSource.loadTasks(filterOptions);
52
+ const taskModeFilter = config.mode
53
+ ? resolveCanonicalTaskMode(config.mode)
54
+ : undefined;
55
+ const tasks = taskModeFilter
56
+ ? allTasks.filter((t) => t.mode === taskModeFilter)
57
+ : allTasks;
40
58
  if (tasks.length === 0) {
41
59
  throw new Error("No tasks found after applying filters.\n" +
42
60
  ` Tasks directory: ${tasksDir}\n` +
@@ -145,12 +163,13 @@ export function resolveTasksDir(rootDir, explicitPath) {
145
163
  // Helpers
146
164
  // ---------------------------------------------------------------------------
147
165
  /**
148
- * Convert a LiteracyTaskDefinition to the camelCase inline format expected
166
+ * Convert a GeneralizedTaskDefinition to the camelCase inline format expected
149
167
  * by the API.
150
168
  */
151
169
  function taskToInlineFormat(task) {
152
170
  const inline = {
153
171
  id: task.id,
172
+ mode: task.mode,
154
173
  description: task.title,
155
174
  featureArea: task.area ?? "",
156
175
  assert: task.assertions ?? [],
@@ -166,14 +185,17 @@ function taskToInlineFormat(task) {
166
185
  ...(task.prompt?.vars ?? {}),
167
186
  };
168
187
  }
169
- if (task.docCoverage) {
170
- inline.docCoverage = true;
171
- }
172
- if (task.referenceSolution) {
173
- inline.referenceSolution = task.referenceSolution;
174
- }
175
- if (task.baseline) {
176
- inline.baseline = task.baseline;
188
+ // Literacy-specific fields
189
+ if (task.mode === "literacy") {
190
+ if (task.docCoverage) {
191
+ inline.docCoverage = true;
192
+ }
193
+ if (task.referenceSolution) {
194
+ inline.referenceSolution = task.referenceSolution;
195
+ }
196
+ if (task.baseline) {
197
+ inline.baseline = task.baseline;
198
+ }
177
199
  }
178
200
  if (task.tags?.length) {
179
201
  inline.tags = task.tags;
@@ -0,0 +1,41 @@
1
+ /**
2
+ * ApiGatewayArtifactUploader — uploads report artifacts via the API Gateway.
3
+ *
4
+ * Counterpart to GcsReportArtifactUploader. Used when the CLI runs locally
5
+ * without GCS credentials. Two-step flow:
6
+ *
7
+ * 1. GET {apiBaseUrl}/v1/artifacts/{reportId}/upload-url?type={artifactType}
8
+ * with Authorization: Bearer {apiKey} — returns a signed PUT URL.
9
+ * 2. PUT the JSON to that URL with Content-Type: application/json and
10
+ * x-goog-if-generation-match: 0 (overwrite-protection contract from
11
+ * the gateway's signed URL).
12
+ *
13
+ * The gateway stays out of the data path — Vercel only signs the URL,
14
+ * the artifact bytes go directly to GCS.
15
+ *
16
+ * Design principles:
17
+ * - P5: Non-blocking — any failure returns null and warns, never throws.
18
+ * - Stateless — no client to keep around between calls.
19
+ *
20
+ * @see docs/design-docs/external-artifact-store.md
21
+ * @see docs/decisions/D0030-external-artifact-store.md
22
+ */
23
+ import type { ArtifactRef, ArtifactUploader } from "../_vendor/ailf-core/index.d.ts";
24
+ export interface ApiGatewayUploaderOptions {
25
+ /** Base URL of the API gateway (e.g., "https://api.ailf.sanity.io"). */
26
+ apiBaseUrl: string;
27
+ /** AILF API key with the `artifact:write` scope. */
28
+ apiKey: string;
29
+ /** GCS bucket name — included in the returned ArtifactRef. */
30
+ bucket: string;
31
+ }
32
+ export declare class ApiGatewayArtifactUploader implements ArtifactUploader {
33
+ private readonly options;
34
+ constructor(options: ApiGatewayUploaderOptions);
35
+ upload(reportId: string, fileName: string, data: unknown): Promise<ArtifactRef | null>;
36
+ /**
37
+ * Fetch a signed upload URL from the gateway. Returns null on any non-2xx
38
+ * response or malformed body so the caller can stay non-blocking.
39
+ */
40
+ private fetchSignedUrl;
41
+ }
@@ -0,0 +1,123 @@
1
+ /**
2
+ * ApiGatewayArtifactUploader — uploads report artifacts via the API Gateway.
3
+ *
4
+ * Counterpart to GcsReportArtifactUploader. Used when the CLI runs locally
5
+ * without GCS credentials. Two-step flow:
6
+ *
7
+ * 1. GET {apiBaseUrl}/v1/artifacts/{reportId}/upload-url?type={artifactType}
8
+ * with Authorization: Bearer {apiKey} — returns a signed PUT URL.
9
+ * 2. PUT the JSON to that URL with Content-Type: application/json and
10
+ * x-goog-if-generation-match: 0 (overwrite-protection contract from
11
+ * the gateway's signed URL).
12
+ *
13
+ * The gateway stays out of the data path — Vercel only signs the URL,
14
+ * the artifact bytes go directly to GCS.
15
+ *
16
+ * Design principles:
17
+ * - P5: Non-blocking — any failure returns null and warns, never throws.
18
+ * - Stateless — no client to keep around between calls.
19
+ *
20
+ * @see docs/design-docs/external-artifact-store.md
21
+ * @see docs/decisions/D0030-external-artifact-store.md
22
+ */
23
+ // ---------------------------------------------------------------------------
24
+ // File-name → artifact-type mapping (mirrors packages/api ARTIFACT_FILES)
25
+ // ---------------------------------------------------------------------------
26
+ /**
27
+ * Reverse map of the API gateway's ARTIFACT_FILES. The uploader port speaks
28
+ * file names; the gateway endpoint speaks artifact types. Keep these in sync
29
+ * with packages/api/src/routes/artifacts.ts.
30
+ */
31
+ const FILE_TO_TYPE = {
32
+ "eval-results.json": "evalResults",
33
+ "grader-prompts.json": "graderPrompts",
34
+ "rendered-prompts.json": "renderedPrompts",
35
+ "task-definitions.json": "taskDefinitions",
36
+ "test-outputs.json": "testOutputs",
37
+ };
38
+ export class ApiGatewayArtifactUploader {
39
+ options;
40
+ constructor(options) {
41
+ this.options = options;
42
+ }
43
+ async upload(reportId, fileName, data) {
44
+ const artifactType = FILE_TO_TYPE[fileName];
45
+ if (!artifactType) {
46
+ console.warn(` ⚠️ Artifact upload skipped (unknown fileName): ${fileName}`);
47
+ return null;
48
+ }
49
+ const objectPath = `reports/${reportId}/${fileName}`;
50
+ const json = JSON.stringify(data);
51
+ const bytes = Buffer.byteLength(json, "utf-8");
52
+ try {
53
+ const signed = await this.fetchSignedUrl(reportId, artifactType);
54
+ if (!signed)
55
+ return null;
56
+ const putRes = await fetch(signed.url, {
57
+ body: json,
58
+ headers: signed.requiredHeaders,
59
+ method: "PUT",
60
+ });
61
+ if (!putRes.ok) {
62
+ console.warn(` ⚠️ Artifact upload failed (non-blocking): ${objectPath} — GCS PUT ${putRes.status} ${putRes.statusText}`);
63
+ return null;
64
+ }
65
+ return {
66
+ bucket: signed.bucket,
67
+ bytes,
68
+ entryCount: extractEntryCount(data),
69
+ path: signed.path,
70
+ store: "gcs",
71
+ };
72
+ }
73
+ catch (err) {
74
+ const message = err instanceof Error ? err.message : String(err);
75
+ console.warn(` ⚠️ Artifact upload failed (non-blocking): ${objectPath} — ${message}`);
76
+ return null;
77
+ }
78
+ }
79
+ /**
80
+ * Fetch a signed upload URL from the gateway. Returns null on any non-2xx
81
+ * response or malformed body so the caller can stay non-blocking.
82
+ */
83
+ async fetchSignedUrl(reportId, artifactType) {
84
+ const url = `${this.options.apiBaseUrl.replace(/\/$/, "")}/v1/artifacts/${encodeURIComponent(reportId)}/upload-url?type=${encodeURIComponent(artifactType)}`;
85
+ const res = await fetch(url, {
86
+ headers: {
87
+ Authorization: `Bearer ${this.options.apiKey}`,
88
+ },
89
+ method: "GET",
90
+ });
91
+ if (!res.ok) {
92
+ console.warn(` ⚠️ Signed-URL request failed: ${res.status} ${res.statusText}`);
93
+ return null;
94
+ }
95
+ const body = (await res.json());
96
+ if (body.object !== "signed_upload_url" ||
97
+ typeof body.url !== "string" ||
98
+ typeof body.path !== "string" ||
99
+ typeof body.bucket !== "string" ||
100
+ !body.requiredHeaders) {
101
+ console.warn(` ⚠️ Signed-URL response was malformed`);
102
+ return null;
103
+ }
104
+ return {
105
+ bucket: body.bucket,
106
+ method: "PUT",
107
+ object: "signed_upload_url",
108
+ path: body.path,
109
+ requiredHeaders: body.requiredHeaders,
110
+ url: body.url,
111
+ };
112
+ }
113
+ }
114
+ function extractEntryCount(data) {
115
+ if (typeof data === "object" &&
116
+ data !== null &&
117
+ "entries" in data &&
118
+ typeof data.entries === "object") {
119
+ return Object.keys(data.entries)
120
+ .length;
121
+ }
122
+ return undefined;
123
+ }
@@ -16,7 +16,7 @@ import { fileURLToPath } from "url";
16
16
  import { classifyUrls } from "../pipeline/classify-url.js";
17
17
  import { normalizeMode } from "../pipeline/normalize-mode.js";
18
18
  import { assessImpact, buildReverseMapping, } from "../pipeline/reverse-mapping.js";
19
- import { buildAppContext } from "../orchestration/build-app-context.js";
19
+ import { buildAppContext, parseArtifactUploadEnv, } from "../orchestration/build-app-context.js";
20
20
  import { buildStepSequence } from "../orchestration/build-step-sequence.js";
21
21
  import { orchestratePipeline } from "../orchestration/pipeline-orchestrator.js";
22
22
  import { load } from "js-yaml";
@@ -329,6 +329,7 @@ export async function executePipeline(cliOpts) {
329
329
  config.captureGcsBucket ??= process.env.AILF_CAPTURE_GCS_BUCKET;
330
330
  config.captureGcsPrefix ??= process.env.AILF_CAPTURE_GCS_PREFIX;
331
331
  config.artifactGcsBucket ??= process.env.AILF_GCS_ARTIFACT_BUCKET;
332
+ config.artifactUpload ??= parseArtifactUploadEnv(process.env.AILF_ARTIFACT_UPLOAD);
332
333
  // Create AppContext directly from the merged config so adapters
333
334
  // (especially taskSource) are wired from the file config's
334
335
  // taskSourceType — not from CLI defaults.
@@ -15,7 +15,7 @@
15
15
  * @see packages/core/src/ports/context.ts — AppContext interface
16
16
  * @see docs/archive/exec-plans/ports-and-adapters/phase-7-composition-root.md
17
17
  */
18
- import { type AppContext, type AssertionRegistration, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
18
+ import { type AppContext, type ArtifactUploader, type AssertionRegistration, type Logger, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
19
19
  /**
20
20
  * Create a fully wired AppContext from resolved configuration.
21
21
  *
@@ -23,6 +23,22 @@ import { type AppContext, type AssertionRegistration, type ResolvedConfig } from
23
23
  * Swapping an adapter is a one-line change in this function.
24
24
  */
25
25
  export declare function createAppContext(config: ResolvedConfig): AppContext;
26
+ /**
27
+ * Selects an ArtifactUploader implementation based on available credentials.
28
+ *
29
+ * Selection order:
30
+ * 1. config.artifactUpload === false → always skip (explicit opt-out)
31
+ * 2. GOOGLE_APPLICATION_CREDENTIALS or GCLOUD_PROJECT present → direct GCS
32
+ * 3. apiKey + apiUrl present → gateway-signed PUT URL
33
+ * 4. Neither → skip silently (P5)
34
+ *
35
+ * The bucket defaults to DEFAULT_ARTIFACT_BUCKET when not explicitly set —
36
+ * users only need to override for self-hosted deployments with a different
37
+ * bucket (and matching gateway signing credentials).
38
+ *
39
+ * Exported for unit-test access; not part of the public package API.
40
+ */
41
+ export declare function createArtifactUploader(config: ResolvedConfig, logger: Logger): ArtifactUploader | undefined;
26
42
  /**
27
43
  * Generic Promptfoo assertion types available to all evaluation modes.
28
44
  *
@@ -17,6 +17,7 @@
17
17
  */
18
18
  import { join } from "node:path";
19
19
  import { InMemoryPluginRegistry, NoOpArtifactCollector, } from "./_vendor/ailf-core/index.js";
20
+ import { ApiGatewayArtifactUploader } from "./artifact-capture/api-gateway-artifact-uploader.js";
20
21
  import { FilesystemArtifactCollector } from "./artifact-capture/filesystem-collector.js";
21
22
  import { GcsArtifactCollector } from "./artifact-capture/gcs-collector.js";
22
23
  import { GcsReportArtifactUploader } from "./artifact-capture/gcs-report-artifact-uploader.js";
@@ -82,10 +83,10 @@ export function createAppContext(config) {
82
83
  : fsCollector;
83
84
  }
84
85
  // Report artifact uploader — uploads structured files to GCS at known
85
- // paths for Studio to fetch via signed URLs (D0030)
86
- const artifactUploader = config.artifactGcsBucket
87
- ? new GcsReportArtifactUploader({ bucket: config.artifactGcsBucket })
88
- : undefined;
86
+ // paths for Studio to fetch via signed URLs (D0030). Auto-detects the
87
+ // right adapter from available credentials; defaults bucket to
88
+ // "ailf-artifacts". Set artifactUpload: false to opt out entirely.
89
+ const artifactUploader = createArtifactUploader(config, logger);
89
90
  return {
90
91
  artifactUploader,
91
92
  cache,
@@ -115,6 +116,53 @@ function createLogger() {
115
116
  process.env.AILF_VERBOSE === "1",
116
117
  });
117
118
  }
119
+ /**
120
+ * Shared GCS bucket for report artifacts. Matches the gateway default at
121
+ * packages/api/src/routes/artifacts.ts — both sides assume ailf-artifacts
122
+ * unless explicitly overridden. The gateway's signing credentials are scoped
123
+ * to this bucket, so alternate names require reconfiguring the gateway.
124
+ */
125
+ const DEFAULT_ARTIFACT_BUCKET = "ailf-artifacts";
126
+ /**
127
+ * Selects an ArtifactUploader implementation based on available credentials.
128
+ *
129
+ * Selection order:
130
+ * 1. config.artifactUpload === false → always skip (explicit opt-out)
131
+ * 2. GOOGLE_APPLICATION_CREDENTIALS or GCLOUD_PROJECT present → direct GCS
132
+ * 3. apiKey + apiUrl present → gateway-signed PUT URL
133
+ * 4. Neither → skip silently (P5)
134
+ *
135
+ * The bucket defaults to DEFAULT_ARTIFACT_BUCKET when not explicitly set —
136
+ * users only need to override for self-hosted deployments with a different
137
+ * bucket (and matching gateway signing credentials).
138
+ *
139
+ * Exported for unit-test access; not part of the public package API.
140
+ */
141
+ export function createArtifactUploader(config, logger) {
142
+ if (config.artifactUpload === false) {
143
+ logger.debug("Artifact upload explicitly disabled via artifactUpload=false");
144
+ return undefined;
145
+ }
146
+ const bucket = config.artifactGcsBucket ?? DEFAULT_ARTIFACT_BUCKET;
147
+ // CI / GCP runtime — direct GCS upload (fastest, no extra hop).
148
+ // We treat the presence of either env var as the user opting in to ADC.
149
+ const hasGcsCredentials = Boolean(process.env.GOOGLE_APPLICATION_CREDENTIALS || process.env.GCLOUD_PROJECT);
150
+ if (hasGcsCredentials) {
151
+ logger.debug(`Artifact uploader: GcsReportArtifactUploader (direct GCS via ADC, bucket=${bucket})`);
152
+ return new GcsReportArtifactUploader({ bucket });
153
+ }
154
+ // Local dev — request signed PUT URLs from the API gateway, no GCS creds needed.
155
+ if (config.apiKey && config.apiUrl) {
156
+ logger.debug(`Artifact uploader: ApiGatewayArtifactUploader (signed URL via ${config.apiUrl}, bucket=${bucket})`);
157
+ return new ApiGatewayArtifactUploader({
158
+ apiBaseUrl: config.apiUrl,
159
+ apiKey: config.apiKey,
160
+ bucket,
161
+ });
162
+ }
163
+ logger.debug("Artifact upload skipped: no GCS credentials or AILF_API_KEY available");
164
+ return undefined;
165
+ }
118
166
  function createCache(config) {
119
167
  const local = new FilesystemCache(config.rootDir);
120
168
  if (config.noRemoteCache)
@@ -18,6 +18,14 @@ import type { ResolvedOptions } from "../commands/pipeline-action.js";
18
18
  * are derived (e.g., areas from areaOption).
19
19
  */
20
20
  export declare function mapToResolvedConfig(opts: ResolvedOptions, rootDir: string): ResolvedConfig;
21
+ /**
22
+ * Parse the AILF_ARTIFACT_UPLOAD env var into a tri-state.
23
+ *
24
+ * - "0" | "false" → false (force-disable)
25
+ * - "1" | "true" → true (force-enable; still a no-op without credentials)
26
+ * - unset | other → undefined (auto-detect from credentials)
27
+ */
28
+ export declare function parseArtifactUploadEnv(value: string | undefined): boolean | undefined;
21
29
  /**
22
30
  * Build an AppContext from legacy ResolvedOptions.
23
31
  *
@@ -85,8 +85,26 @@ export function mapToResolvedConfig(opts, rootDir) {
85
85
  captureGcsBucket: process.env.AILF_CAPTURE_GCS_BUCKET,
86
86
  captureGcsPrefix: process.env.AILF_CAPTURE_GCS_PREFIX,
87
87
  artifactGcsBucket: process.env.AILF_GCS_ARTIFACT_BUCKET,
88
+ artifactUpload: parseArtifactUploadEnv(process.env.AILF_ARTIFACT_UPLOAD),
88
89
  };
89
90
  }
91
+ /**
92
+ * Parse the AILF_ARTIFACT_UPLOAD env var into a tri-state.
93
+ *
94
+ * - "0" | "false" → false (force-disable)
95
+ * - "1" | "true" → true (force-enable; still a no-op without credentials)
96
+ * - unset | other → undefined (auto-detect from credentials)
97
+ */
98
+ export function parseArtifactUploadEnv(value) {
99
+ if (value === undefined)
100
+ return undefined;
101
+ const normalized = value.trim().toLowerCase();
102
+ if (normalized === "0" || normalized === "false")
103
+ return false;
104
+ if (normalized === "1" || normalized === "true")
105
+ return true;
106
+ return undefined;
107
+ }
90
108
  /**
91
109
  * Build an AppContext from legacy ResolvedOptions.
92
110
  *
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "2.4.0",
3
+ "version": "2.6.0",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "public"