@sanity/ailf 3.1.1 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Shared preamble for uploading an artifact payload from an `ArtifactWriter`.
3
+ *
4
+ * All three `ArtifactWriter` implementations (direct GCS, API Gateway, batching
5
+ * API Gateway) must apply the same `redact → serialize → bytecount` pipeline so
6
+ * secrets are stripped before leaving the process. Routing each writer through
7
+ * this helper prevents drift — any future writer that skips the helper would
8
+ * fail the contract test in
9
+ * `src/__tests__/artifact-upload-redaction.test.ts`.
10
+ *
11
+ * NDJSON streaming is **not** handled here — each row is redacted independently
12
+ * by the NDJSON writer path before being concatenated into a part body.
13
+ */
14
+ import type { ArtifactMime } from "../_vendor/ailf-core/index.d.ts";
15
+ export interface PreparedUploadBody {
16
+ readonly body: string;
17
+ readonly bytes: number;
18
+ }
19
+ /**
20
+ * Redact, serialize, and size `payload` for upload.
21
+ *
22
+ * Serialization branches on `mime`:
23
+ * - `application/json` (and anything else JSON-shaped, including the
24
+ * single-shot side of `application/x-ndjson`) → `JSON.stringify`.
25
+ * - `text/markdown` / `application/yaml` → coerce to string via `String()`.
26
+ */
27
+ export declare function prepareUploadBody(payload: unknown, mime: ArtifactMime): PreparedUploadBody;
@@ -0,0 +1,36 @@
1
+ /**
2
+ * Shared preamble for uploading an artifact payload from an `ArtifactWriter`.
3
+ *
4
+ * All three `ArtifactWriter` implementations (direct GCS, API Gateway, batching
5
+ * API Gateway) must apply the same `redact → serialize → bytecount` pipeline so
6
+ * secrets are stripped before leaving the process. Routing each writer through
7
+ * this helper prevents drift — any future writer that skips the helper would
8
+ * fail the contract test in
9
+ * `src/__tests__/artifact-upload-redaction.test.ts`.
10
+ *
11
+ * NDJSON streaming is **not** handled here — each row is redacted independently
12
+ * by the NDJSON writer path before being concatenated into a part body.
13
+ */
14
+ import { redactArtifactData } from "./redact-artifact.js";
15
+ /**
16
+ * Redact, serialize, and size `payload` for upload.
17
+ *
18
+ * Serialization branches on `mime`:
19
+ * - `application/json` (and anything else JSON-shaped, including the
20
+ * single-shot side of `application/x-ndjson`) → `JSON.stringify`.
21
+ * - `text/markdown` / `application/yaml` → coerce to string via `String()`.
22
+ */
23
+ export function prepareUploadBody(payload, mime) {
24
+ const redacted = redactArtifactData(payload);
25
+ const body = serializeForMime(redacted, mime);
26
+ const bytes = Buffer.byteLength(body, "utf-8");
27
+ return { body, bytes };
28
+ }
29
+ function serializeForMime(payload, mime) {
30
+ if (mime === "text/markdown" || mime === "application/yaml") {
31
+ if (typeof payload === "string")
32
+ return payload;
33
+ return String(payload ?? "");
34
+ }
35
+ return JSON.stringify(payload);
36
+ }
@@ -5,7 +5,7 @@
5
5
  * task files. The generated files are ready-to-edit starting points —
6
6
  * not live evaluation tasks.
7
7
  *
8
- * TypeScript output (default) uses define* helpers from @sanity/ailf-core
8
+ * TypeScript output (default) uses define* helpers from @sanity/ailf
9
9
  * for full IDE autocomplete and type checking. YAML output serializes the
10
10
  * parsed task data. JSON output is a plain serialization of the parsed data.
11
11
  *
@@ -5,7 +5,7 @@
5
5
  * task files. The generated files are ready-to-edit starting points —
6
6
  * not live evaluation tasks.
7
7
  *
8
- * TypeScript output (default) uses define* helpers from @sanity/ailf-core
8
+ * TypeScript output (default) uses define* helpers from @sanity/ailf
9
9
  * for full IDE autocomplete and type checking. YAML output serializes the
10
10
  * parsed task data. JSON output is a plain serialization of the parsed data.
11
11
  *
@@ -258,7 +258,7 @@ async function runInit(opts) {
258
258
  if (format === "ts") {
259
259
  console.log();
260
260
  console.log(` 💡 TypeScript tasks (${taskExt}) give you full IDE autocomplete`);
261
- console.log(" via defineTask() from @sanity/ailf-core.");
261
+ console.log(" via defineTask() from @sanity/ailf.");
262
262
  }
263
263
  console.log();
264
264
  console.log(" 🔑 Retrieve the API key from 1Password (Sanity employees):");
@@ -289,7 +289,7 @@ const CUSTOM_PRESET_TS = `/**
289
289
  * @see https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/presets.md
290
290
  */
291
291
 
292
- import { definePreset } from "../_vendor/ailf-core/index.js"
292
+ import { definePreset } from "@sanity/ailf"
293
293
 
294
294
  export default definePreset({
295
295
  name: "my-docs-evaluation",
@@ -191,12 +191,18 @@ export function computeResolvedOptions(opts) {
191
191
  // Smart default: full runs auto-publish when store is configured
192
192
  publishEnabled = reportStoreConfigured && !debugEnabled;
193
193
  }
194
+ // Resolve task source + repo tasks path before anything that depends on
195
+ // them (report store overrides, output dir). When --task-source=repo is
196
+ // set without --repo-tasks-path, default to ./.ailf/tasks/ — the location
197
+ // created by `ailf init`.
198
+ const resolvedTaskSourceType = resolveTaskSourceType(opts.taskSource);
199
+ const resolvedRepoTasksPath = resolveRepoTasksPath(callerCwd, opts.repoTasksPath, resolvedTaskSourceType);
194
200
  // Report store overrides — resolution order:
195
201
  // 1. Explicit CLI flags (--report-dataset, --report-project)
196
202
  // 2. Environment variables (AILF_REPORT_DATASET, AILF_REPORT_PROJECT_ID)
197
- // 3. .ailf/config.yaml reportStore block (when --repo-tasks-path is set)
203
+ // 3. .ailf/config.yaml reportStore block (when repo tasks path is set)
198
204
  // 4. Eval dataset override (so perspective evals publish to the same dataset)
199
- const repoConfig = loadRepoConfigIfPresent(opts.repoTasksPath);
205
+ const repoConfig = loadRepoConfigIfPresent(resolvedRepoTasksPath);
200
206
  const reportDataset = opts.reportDataset ??
201
207
  process.env.AILF_REPORT_DATASET ??
202
208
  repoConfig?.reportStore?.dataset ??
@@ -211,10 +217,6 @@ export function computeResolvedOptions(opts) {
211
217
  const apiUrl = opts.apiUrl ?? process.env.AILF_API_URL ?? "https://ailf-api.sanity.build";
212
218
  const apiKey = process.env.AILF_API_KEY ?? undefined;
213
219
  // Output directory: explicit --output-dir → $CWD/.ailf/results/latest/
214
- const resolvedRepoTasksPath = opts.repoTasksPath
215
- ? resolve(callerCwd, opts.repoTasksPath)
216
- : undefined;
217
- const resolvedTaskSourceType = resolveTaskSourceType(opts.taskSource);
218
220
  const outputDir = resolveOutputDir(opts.outputDir);
219
221
  return {
220
222
  allowedOriginArgs,
@@ -299,6 +301,39 @@ function resolveTaskSourceType(raw) {
299
301
  console.error(`❌ Invalid --task-source "${raw}". Must be "repo" or "content-lake".`);
300
302
  process.exit(1);
301
303
  }
304
+ /**
305
+ * Resolve the repo tasks path.
306
+ *
307
+ * - Explicit `--repo-tasks-path` wins (resolved relative to callerCwd).
308
+ * - When `--task-source=repo` is set without a path, defaults to
309
+ * `./.ailf/tasks/` in callerCwd — the location created by `ailf init`.
310
+ * - Otherwise returns undefined (Content Lake source).
311
+ *
312
+ * Exits with a helpful error when an explicit path doesn't exist, or when
313
+ * the repo source was requested but no tasks directory can be found.
314
+ */
315
+ function resolveRepoTasksPath(callerCwd, explicitPath, taskSourceType) {
316
+ if (explicitPath) {
317
+ const abs = resolve(callerCwd, explicitPath);
318
+ if (!existsSync(abs)) {
319
+ console.error(`❌ Repo tasks directory not found: ${abs}\n` +
320
+ " Provide a valid --repo-tasks-path, or run 'ailf init' to scaffold .ailf/tasks/.");
321
+ process.exit(1);
322
+ }
323
+ return abs;
324
+ }
325
+ if (taskSourceType === "repo") {
326
+ const defaultPath = resolve(callerCwd, ".ailf", "tasks");
327
+ if (!existsSync(defaultPath)) {
328
+ console.error(`❌ --task-source=repo was set but no tasks directory was found.\n` +
329
+ ` Looked for: ${defaultPath}\n` +
330
+ " Run 'ailf init' to scaffold .ailf/tasks/, or pass --repo-tasks-path <path>.");
331
+ process.exit(1);
332
+ }
333
+ return defaultPath;
334
+ }
335
+ return undefined;
336
+ }
302
337
  // ---------------------------------------------------------------------------
303
338
  // Pipeline entry point
304
339
  // ---------------------------------------------------------------------------
@@ -330,6 +365,16 @@ export async function executePipeline(cliOpts) {
330
365
  if (cliOpts.repoTasksPath) {
331
366
  config.repoTasksPath = resolve(callerCwd, cliOpts.repoTasksPath);
332
367
  }
368
+ else if (config.taskSourceType === "repo" && !config.repoTasksPath) {
369
+ // Default: when taskSource=repo but no path set, look in .ailf/tasks/
370
+ // (matches the `ailf init` scaffold location). Silent fallback here —
371
+ // composition root will surface a helpful error if the directory is
372
+ // missing.
373
+ const defaultPath = resolve(callerCwd, ".ailf", "tasks");
374
+ if (existsSync(defaultPath)) {
375
+ config.repoTasksPath = defaultPath;
376
+ }
377
+ }
333
378
  if (cliOpts.output) {
334
379
  config.outputPath = resolve(callerCwd, cliOpts.output);
335
380
  }
@@ -51,7 +51,7 @@ export function createPipelineCommand() {
51
51
  .option("--output-dir <path>", "Base directory for pipeline output artifacts (default: inferred from execution context)")
52
52
  .option("--promptfoo-url <url>", "Promptfoo share URL for report")
53
53
  .option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), repo (repo tasks only, no Content Lake merge)", "content-lake")
54
- .option("--repo-tasks-path <path>", "Path to repo-based task definitions (.ailf/tasks/ directory)")
54
+ .option("--repo-tasks-path <path>", "Path to repo-based task definitions. Defaults to ./.ailf/tasks/ when --task-source=repo.")
55
55
  .option("--remote", "Submit evaluation to the AILF API instead of running locally", false)
56
56
  .option("--api-url <url>", "AILF API base URL (default: https://ailf-api.sanity.build)")
57
57
  .option("--no-artifacts", "Disable all artifact writers (D0033). Overrides --artifacts-dir.")
@@ -1,9 +1,9 @@
1
1
  /**
2
2
  * validate-tasks command — standalone validation of task files.
3
3
  *
4
- * Validates .ailf/tasks/*.yaml files against the CanonicalTaskSchema without
5
- * running the full pipeline. Useful for pre-commit hooks and CI checks
6
- * in external repos.
4
+ * Validates .ailf/tasks/*.yaml and .ailf/tasks/*.task.ts files against the
5
+ * CanonicalTaskSchema without running the full pipeline. Useful for
6
+ * pre-commit hooks and CI checks in external repos.
7
7
  *
8
8
  * Usage:
9
9
  * ailf validate-tasks .ailf/tasks/
@@ -11,6 +11,17 @@
11
11
  *
12
12
  * @see packages/eval/src/adapters/task-sources/repo-schemas.ts
13
13
  * @see packages/eval/src/adapters/task-sources/repo-validation.ts
14
+ * @see packages/eval/src/adapters/task-sources/task-file-loader.ts
14
15
  */
15
16
  import { Command } from "commander";
17
+ export interface ValidateTasksOptions {
18
+ strict: boolean;
19
+ callerCwd?: string;
20
+ }
16
21
  export declare function createValidateTasksCommand(): Command;
22
+ /**
23
+ * Execute the validate-tasks command logic. Returns the exit code (0 success,
24
+ * 1 failure) so callers can decide how to surface it — the CLI wrapper calls
25
+ * `process.exit`, tests can assert directly.
26
+ */
27
+ export declare function runValidateTasks(tasksPath: string, opts: ValidateTasksOptions): Promise<number>;
@@ -1,9 +1,9 @@
1
1
  /**
2
2
  * validate-tasks command — standalone validation of task files.
3
3
  *
4
- * Validates .ailf/tasks/*.yaml files against the CanonicalTaskSchema without
5
- * running the full pipeline. Useful for pre-commit hooks and CI checks
6
- * in external repos.
4
+ * Validates .ailf/tasks/*.yaml and .ailf/tasks/*.task.ts files against the
5
+ * CanonicalTaskSchema without running the full pipeline. Useful for
6
+ * pre-commit hooks and CI checks in external repos.
7
7
  *
8
8
  * Usage:
9
9
  * ailf validate-tasks .ailf/tasks/
@@ -11,97 +11,141 @@
11
11
  *
12
12
  * @see packages/eval/src/adapters/task-sources/repo-schemas.ts
13
13
  * @see packages/eval/src/adapters/task-sources/repo-validation.ts
14
+ * @see packages/eval/src/adapters/task-sources/task-file-loader.ts
14
15
  */
15
16
  import { existsSync, readdirSync, readFileSync } from "fs";
16
- import { resolve, relative } from "path";
17
+ import { resolve, relative, basename } from "path";
17
18
  import { Command } from "commander";
18
19
  import { load } from "js-yaml";
19
20
  import { detectLegacyFieldNames, parseCanonicalTaskFile, } from "../adapters/task-sources/repo-schemas.js";
20
21
  import { validateCanonicalTasks, formatValidationResult, } from "../adapters/task-sources/repo-validation.js";
22
+ import { discoverTsTaskFiles, loadTsTaskFile, } from "../adapters/task-sources/task-file-loader.js";
21
23
  export function createValidateTasksCommand() {
22
24
  return new Command("validate-tasks")
23
- .description("Validate task YAML files (.ailf/tasks/) against the canonical schema")
25
+ .description("Validate task files (YAML and TypeScript) in .ailf/tasks/ against the canonical schema")
24
26
  .argument("[path]", "Path to tasks directory (default: .ailf/tasks/)", ".ailf/tasks")
25
27
  .option("--strict", "Treat warnings as errors", false)
26
28
  .action(async (tasksPath, opts) => {
27
- // Resolve relative to the caller's working directory, not the
28
- // eval package root (which differs when run via bin/ailf.js)
29
- const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
30
- const resolvedPath = resolve(callerCwd, tasksPath);
31
- if (!existsSync(resolvedPath)) {
32
- console.error(`Directory not found: ${resolvedPath}`);
33
- process.exit(1);
29
+ const exitCode = await runValidateTasks(tasksPath, opts);
30
+ process.exit(exitCode);
31
+ });
32
+ }
33
+ /**
34
+ * Execute the validate-tasks command logic. Returns the exit code (0 success,
35
+ * 1 failure) so callers can decide how to surface it — the CLI wrapper calls
36
+ * `process.exit`, tests can assert directly.
37
+ */
38
+ export async function runValidateTasks(tasksPath, opts) {
39
+ // Resolve relative to the caller's working directory, not the
40
+ // eval package root (which differs when run via bin/ailf.js)
41
+ const callerCwd = opts.callerCwd ?? process.env.AILF_CALLER_CWD ?? process.cwd();
42
+ const resolvedPath = resolve(callerCwd, tasksPath);
43
+ if (!existsSync(resolvedPath)) {
44
+ console.error(`Directory not found: ${resolvedPath}`);
45
+ return 1;
46
+ }
47
+ const yamlFiles = readdirSync(resolvedPath).filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."));
48
+ const tsFiles = discoverTsTaskFiles(resolvedPath);
49
+ const fileCount = yamlFiles.length + tsFiles.length;
50
+ if (fileCount === 0) {
51
+ console.error(`No task files found in ${resolvedPath}\n` +
52
+ " Expected .yaml, .yml, .task.ts, or .task.js files");
53
+ return 1;
54
+ }
55
+ console.log(`\nValidating ${fileCount} task file(s) in ${relative(process.cwd(), resolvedPath)}/\n`);
56
+ let totalTasks = 0;
57
+ let hasErrors = false;
58
+ const allTasks = [];
59
+ for (const file of yamlFiles) {
60
+ const filePath = resolve(resolvedPath, file);
61
+ const raw = readFileSync(filePath, "utf-8");
62
+ let parsed;
63
+ try {
64
+ parsed = load(raw);
34
65
  }
35
- const yamlFiles = readdirSync(resolvedPath).filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."));
36
- if (yamlFiles.length === 0) {
37
- console.error(`No YAML files found in ${resolvedPath}`);
38
- process.exit(1);
66
+ catch (err) {
67
+ const msg = err instanceof Error ? err.message : String(err);
68
+ console.error(` ${file}: YAML parse error`);
69
+ console.error(` ${msg}\n`);
70
+ hasErrors = true;
71
+ continue;
39
72
  }
40
- console.log(`\nValidating ${yamlFiles.length} task file(s) in ${relative(process.cwd(), resolvedPath)}/\n`);
41
- let totalTasks = 0;
42
- let hasErrors = false;
43
- const allTasks = [];
44
- for (const file of yamlFiles) {
45
- const filePath = resolve(resolvedPath, file);
46
- const raw = readFileSync(filePath, "utf-8");
47
- let parsed;
48
- try {
49
- parsed = load(raw);
50
- }
51
- catch (err) {
52
- const msg = err instanceof Error ? err.message : String(err);
53
- console.error(` ${file}: YAML parse error`);
54
- console.error(` ${msg}\n`);
55
- hasErrors = true;
56
- continue;
57
- }
58
- if (!Array.isArray(parsed)) {
59
- console.error(` ${file}: Expected a YAML array of task definitions`);
60
- hasErrors = true;
61
- continue;
62
- }
63
- // Detect legacy field names before Zod validation
64
- const legacyWarnings = detectLegacyFieldNames(parsed, file);
65
- if (legacyWarnings.length > 0) {
66
- console.error(` ${file}: Uses legacy field names`);
67
- for (const w of legacyWarnings) {
68
- console.error(` ${w}`);
69
- }
70
- console.error();
71
- hasErrors = true;
72
- continue;
73
- }
74
- try {
75
- const tasks = parseCanonicalTaskFile(parsed, file);
76
- console.log(` ${file}: ${tasks.length} task${tasks.length === 1 ? "" : "s"} valid`);
77
- totalTasks += tasks.length;
78
- allTasks.push(...tasks);
79
- }
80
- catch (err) {
81
- const msg = err instanceof Error ? err.message : String(err);
82
- console.error(` ${file}: Schema validation failed`);
83
- console.error(`${msg
84
- .split("\n")
85
- .map((l) => ` ${l}`)
86
- .join("\n")}\n`);
87
- hasErrors = true;
88
- }
73
+ if (!Array.isArray(parsed)) {
74
+ console.error(` ${file}: Expected a YAML array of task definitions`);
75
+ hasErrors = true;
76
+ continue;
89
77
  }
90
- // Run semantic validation on all parsed tasks
91
- if (allTasks.length > 0) {
92
- console.log(); // blank line
93
- const semanticResult = validateCanonicalTasks(allTasks);
94
- const formatted = formatValidationResult(semanticResult);
95
- console.log(formatted);
96
- if (!semanticResult.valid) {
97
- hasErrors = true;
98
- }
99
- if (opts.strict && semanticResult.warnings.length > 0) {
100
- hasErrors = true;
101
- console.log("\n --strict mode: warnings treated as errors");
102
- }
78
+ if (!validateTaskArray(parsed, file, allTasks)) {
79
+ hasErrors = true;
80
+ continue;
103
81
  }
104
- console.log(`\n${hasErrors ? "FAIL" : "OK"} ${totalTasks} task${totalTasks === 1 ? "" : "s"} across ${yamlFiles.length} file${yamlFiles.length === 1 ? "" : "s"}\n`);
105
- process.exit(hasErrors ? 1 : 0);
106
- });
82
+ totalTasks += parsed.length;
83
+ }
84
+ for (const tsFilePath of tsFiles) {
85
+ const file = basename(tsFilePath);
86
+ let loaded;
87
+ try {
88
+ loaded = await loadTsTaskFile(tsFilePath);
89
+ }
90
+ catch (err) {
91
+ const msg = err instanceof Error ? err.message : String(err);
92
+ console.error(` ${file}: Failed to load TypeScript task file`);
93
+ console.error(` ${msg}\n`);
94
+ hasErrors = true;
95
+ continue;
96
+ }
97
+ if (!validateTaskArray(loaded.tasks, file, allTasks)) {
98
+ hasErrors = true;
99
+ continue;
100
+ }
101
+ totalTasks += loaded.tasks.length;
102
+ }
103
+ if (allTasks.length > 0) {
104
+ console.log();
105
+ const semanticResult = validateCanonicalTasks(allTasks);
106
+ const formatted = formatValidationResult(semanticResult);
107
+ console.log(formatted);
108
+ if (!semanticResult.valid) {
109
+ hasErrors = true;
110
+ }
111
+ if (opts.strict && semanticResult.warnings.length > 0) {
112
+ hasErrors = true;
113
+ console.log("\n --strict mode: warnings treated as errors");
114
+ }
115
+ }
116
+ console.log(`\n${hasErrors ? "FAIL" : "OK"} ${totalTasks} task${totalTasks === 1 ? "" : "s"} across ${fileCount} file${fileCount === 1 ? "" : "s"}\n`);
117
+ return hasErrors ? 1 : 0;
118
+ }
119
+ /**
120
+ * Validate an array of raw task entries — runs the legacy-field detector and
121
+ * the canonical Zod schema, appending valid tasks to `accumulator`.
122
+ *
123
+ * Returns `true` when the file is fully valid, `false` when any error was
124
+ * reported (the caller is responsible for flipping its own error flag).
125
+ */
126
+ function validateTaskArray(entries, file, accumulator) {
127
+ const legacyWarnings = detectLegacyFieldNames(entries, file);
128
+ if (legacyWarnings.length > 0) {
129
+ console.error(` ${file}: Uses legacy field names`);
130
+ for (const w of legacyWarnings) {
131
+ console.error(` ${w}`);
132
+ }
133
+ console.error();
134
+ return false;
135
+ }
136
+ try {
137
+ const tasks = parseCanonicalTaskFile(entries, file);
138
+ console.log(` ${file}: ${tasks.length} task${tasks.length === 1 ? "" : "s"} valid`);
139
+ accumulator.push(...tasks);
140
+ return true;
141
+ }
142
+ catch (err) {
143
+ const msg = err instanceof Error ? err.message : String(err);
144
+ console.error(` ${file}: Schema validation failed`);
145
+ console.error(`${msg
146
+ .split("\n")
147
+ .map((l) => ` ${l}`)
148
+ .join("\n")}\n`);
149
+ return false;
150
+ }
107
151
  }
@@ -188,14 +188,19 @@ export function createArtifactWriter(config, logger, progress) {
188
188
  exclude,
189
189
  ...(remote ? {} : { progress }),
190
190
  });
191
+ // W0064 — when a remote backend is wired, list it first so its ArtifactRef
192
+ // wins the fanout's firstNonNull() selection and the published manifest
193
+ // points at a cross-machine-readable store. Local stays attached as the
194
+ // resilience tier: if the remote leg fails, firstNonNull falls through to
195
+ // local and the pipeline still produces a non-null ref.
191
196
  const base = remote
192
- ? new FanoutArtifactWriter([local, remote], { progress })
197
+ ? new FanoutArtifactWriter([remote, local], { progress })
193
198
  : local;
194
199
  if (!remote) {
195
200
  logger.debug(`Artifact writer: LocalFilesystemArtifactWriter only (rootDir=${rootDir})`);
196
201
  }
197
202
  else {
198
- logger.debug(`Artifact writer: FanoutArtifactWriter([local=${rootDir}, ${remote.constructor.name}])`);
203
+ logger.debug(`Artifact writer: FanoutArtifactWriter([${remote.constructor.name}, local=${rootDir}])`);
199
204
  }
200
205
  // Wrap in the accumulator so FinalizeRunStep can build a populated
201
206
  // RunManifest without each producer bookkeeping its own ArtifactRefs
package/dist/index.d.ts CHANGED
@@ -39,3 +39,5 @@ export { env } from "./_vendor/ailf-core/index.d.ts";
39
39
  export type { AgentHarnessTaskDefinition, CustomTaskDefinition, GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, GeneralizedValueAssertion, IdDocRef, KnowledgeProbeTaskDefinition, LiteracyTaskDefinition, MCPServerTaskDefinition, PathDocRef, PerspectiveDocRef, RubricRef, SlugDocRef, TaskCommonFields, TaskDifficulty, TaskOptions, TaskProviderConfig, TaskStatus, } from "./_vendor/ailf-core/index.d.ts";
40
40
  export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, RUBRIC_TEMPLATE_NAMES, type CanonicalTask, type CuratedAssertionType, type RubricTemplateName, } from "./adapters/task-sources/repo-schemas.js";
41
41
  export { formatValidationResult, validateCanonicalTasks, type ValidationMessage, type ValidationResult, } from "./adapters/task-sources/repo-validation.js";
42
+ export { InMemoryPluginRegistry } from "./_vendor/ailf-core/index.d.ts";
43
+ export type { CompilationContext, ModeBase, ModeCompileResult, ModeHandler, PresetDefinition, } from "./_vendor/ailf-core/index.d.ts";
package/dist/index.js CHANGED
@@ -46,3 +46,7 @@ export { env } from "./_vendor/ailf-core/index.js";
46
46
  // ---------------------------------------------------------------------------
47
47
  export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, RUBRIC_TEMPLATE_NAMES, } from "./adapters/task-sources/repo-schemas.js";
48
48
  export { formatValidationResult, validateCanonicalTasks, } from "./adapters/task-sources/repo-validation.js";
49
+ // ---------------------------------------------------------------------------
50
+ // Plugin extension points — for authoring custom presets, modes, and registries
51
+ // ---------------------------------------------------------------------------
52
+ export { InMemoryPluginRegistry } from "./_vendor/ailf-core/index.js";
@@ -130,6 +130,12 @@ export async function orchestratePipeline(ctx, steps) {
130
130
  const pipelineStart = Date.now();
131
131
  const hasJob = !!ctx.config.jobId;
132
132
  const jobUpdates = [];
133
+ // DOC-2064 — tracks whether the pre-finalize pipelineContext emit fired so
134
+ // the post-loop fallback can skip redundant writes. A second emit to the
135
+ // same GCS path produces a 412 Precondition Failed from the signed-URL
136
+ // writer (which enforces no-overwrite), logging spurious warnings on every
137
+ // successful run.
138
+ let pipelineContextEmitted = false;
133
139
  ctx.logger.section("ai-literacy-framework — Evaluation Pipeline");
134
140
  ctx.logger.debug(`Pipeline starting with ${steps.length} steps`, {
135
141
  steps: steps.map((s) => s.name),
@@ -152,6 +158,16 @@ export async function orchestratePipeline(ctx, steps) {
152
158
  ctx.logger.debug(`Starting step ${i + 1}/${steps.length}: ${step.name}`);
153
159
  ctx.logger.section(step.name);
154
160
  exportPhase.maybeOpen(step.name);
161
+ // DOC-2064 — emit pipelineContext BEFORE finalize-run so the artifact
162
+ // ref registers with the accumulator and lands in RunManifest.artifacts,
163
+ // which PublishReportStep then snapshots into Report.artifactManifest.
164
+ // The previous post-loop emit ran after publish and was invisible to
165
+ // Content Lake readers. The failure-path capture below still fires on
166
+ // pre-finalize aborts so aborted runs retain the on-disk artifact.
167
+ if (step.name === "finalize-run") {
168
+ await capturePipelineContext(ctx, state, results);
169
+ pipelineContextEmitted = true;
170
+ }
155
171
  // Report current step progress
156
172
  if (hasJob) {
157
173
  await reportJobProgress(ctx, step.name, i, steps.length, "running", undefined, jobUpdates);
@@ -175,8 +191,12 @@ export async function orchestratePipeline(ctx, steps) {
175
191
  }
176
192
  // Capture pipeline context before exiting. `job-updates` was an
177
193
  // observability-only capture not tied to a registered artifact type;
178
- // dropped in W0050. Use the JobStore path for job telemetry.
179
- await capturePipelineContext(ctx, state, results);
194
+ // dropped in W0050. Use the JobStore path for job telemetry. Skip
195
+ // when the pre-finalize emit already fired to avoid a 412 overwrite
196
+ // warning (DOC-2064).
197
+ if (!pipelineContextEmitted) {
198
+ await capturePipelineContext(ctx, state, results);
199
+ }
180
200
  exportPhase.close();
181
201
  return {
182
202
  belowCritical: state.belowCritical,
@@ -231,9 +251,18 @@ export async function orchestratePipeline(ctx, steps) {
231
251
  ctx.logger.warn("Failed to report job completion — continuing");
232
252
  }
233
253
  }
234
- // Capture pipeline context. `job-updates` observability captures were
235
- // dropped in Slice 6.1JobStore is the supported telemetry path.
236
- await capturePipelineContext(ctx, state, results);
254
+ // DOC-2064 post-loop fallback. Only fires when the pre-finalize emit
255
+ // inside the step loop didn't run typically because the pipeline has no
256
+ // finalize-run step (test harnesses, air-gapped runs). Skipping this when
257
+ // the pre-finalize emit already fired avoids a 412 Precondition Failed
258
+ // from the signed-URL writer, which refuses to overwrite the existing
259
+ // path. The tradeoff is that pipelineContext captures pipeline state as
260
+ // of finalize-run, not post-publish — reportId is absent. Acceptable
261
+ // because runId is the primary join key and reportId is trivially
262
+ // looked up from Content Lake via runId.
263
+ if (!pipelineContextEmitted) {
264
+ await capturePipelineContext(ctx, state, results);
265
+ }
237
266
  exportPhase.close();
238
267
  return {
239
268
  belowCritical: state.belowCritical,
@@ -47,6 +47,12 @@ export interface RawTestResult {
47
47
  };
48
48
  response: {
49
49
  output: string;
50
+ tokenUsage?: {
51
+ cached?: number;
52
+ completion?: number;
53
+ prompt?: number;
54
+ total?: number;
55
+ };
50
56
  };
51
57
  testCase?: {
52
58
  description?: string;
@@ -223,6 +223,7 @@ export function extractStoredTestResults(resultsPath) {
223
223
  }
224
224
  dimensions.push({ dimension, reason, score });
225
225
  }
226
+ const tokenUsage = result.response?.tokenUsage;
226
227
  testResults.push({
227
228
  area,
228
229
  cost: result.cost || undefined,
@@ -233,6 +234,7 @@ export function extractStoredTestResults(resultsPath) {
233
234
  responseOutput,
234
235
  ...(responseOutputTruncated && { responseOutputTruncated: true }),
235
236
  taskId,
237
+ ...(tokenUsage && { tokenUsage }),
236
238
  variant,
237
239
  });
238
240
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "3.1.1",
3
+ "version": "3.3.0",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "public"