@sanity/ailf 3.2.0 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,55 @@
1
+ /**
2
+ * ailf-resolver.ts — locate `@sanity/ailf` for user TS files, with a bundled fallback.
3
+ *
4
+ * User `.ailf/*.ts` files import `defineTask` / `defineConfig` / `definePreset`
5
+ * from `@sanity/ailf`. In a fresh project with no local install, that bare
6
+ * specifier cannot resolve from the user's tree. To keep `ailf init` → `ailf
7
+ * pipeline` working out of the box we transparently fall back to the CLI's own
8
+ * copy of `@sanity/ailf` by registering a jiti module alias. A user-local
9
+ * install always wins — the fallback kicks in only when resolution fails.
10
+ *
11
+ * All jiti callsites across the eval package use `resolveAilfAlias()` to get
12
+ * a consistent resolution + warning story. Callers pass the returned map (or
13
+ * nothing) to `createJiti`.
14
+ */
15
+ /**
16
+ * Probe whether the user has `@sanity/ailf` installed as a local dependency
17
+ * reachable from the given path. Walks up the directory tree looking for a
18
+ * `node_modules/@sanity/ailf/package.json`. Returns the package entry point
19
+ * path on success, null otherwise.
20
+ *
21
+ * We intentionally do NOT use Node's `require.resolve` self-reference path:
22
+ * tsx and some bundler setups make it unreliable, and a self-reference
23
+ * would only match when the caller *is* the `@sanity/ailf` package (the
24
+ * monorepo devving case), which is semantically the same as having no
25
+ * install — the bundled fallback handles it.
26
+ */
27
+ export declare function probeUserLocalAilf(fromPath: string): string | null;
28
+ /**
29
+ * Return the path to the CLI's own bundled copy of `@sanity/ailf`. Used as the
30
+ * fallback target when a user's project does not have it installed.
31
+ *
32
+ * We walk the filesystem rather than `require.resolve("@sanity/ailf")` because
33
+ * self-reference resolution is unreliable under tsx and some bundler setups.
34
+ * Returns null in exotic setups where no ancestor package.json matches.
35
+ */
36
+ export declare function getBundledAilfPath(): string | null;
37
+ /**
38
+ * Emit a one-shot stderr advisory when the loader falls back to the bundled
39
+ * `@sanity/ailf`. The flag is module-scoped so a single pipeline run warns at
40
+ * most once, no matter how many TS files trigger the fallback.
41
+ */
42
+ export declare function warnBundledFallbackOnce(): void;
43
+ /** Test-only: reset the warn-once flag between unit tests. */
44
+ export declare function resetBundledFallbackWarning(): void;
45
+ /**
46
+ * Decide whether jiti should alias `@sanity/ailf` → bundled-path for the given
47
+ * file. Returns the alias map or null.
48
+ *
49
+ * - User-local resolves → returns null (jiti's natural walk finds it).
50
+ * - User-local fails + bundled path available → returns alias map, fires
51
+ * one-shot warning, logs at verbose level.
52
+ * - User-local fails + no bundled path → returns null (nothing we can do;
53
+ * jiti will surface the original MODULE_NOT_FOUND).
54
+ */
55
+ export declare function resolveAilfAlias(filePath: string): Record<string, string> | null;
@@ -0,0 +1,147 @@
1
+ /**
2
+ * ailf-resolver.ts — locate `@sanity/ailf` for user TS files, with a bundled fallback.
3
+ *
4
+ * User `.ailf/*.ts` files import `defineTask` / `defineConfig` / `definePreset`
5
+ * from `@sanity/ailf`. In a fresh project with no local install, that bare
6
+ * specifier cannot resolve from the user's tree. To keep `ailf init` → `ailf
7
+ * pipeline` working out of the box we transparently fall back to the CLI's own
8
+ * copy of `@sanity/ailf` by registering a jiti module alias. A user-local
9
+ * install always wins — the fallback kicks in only when resolution fails.
10
+ *
11
+ * All jiti callsites across the eval package use `resolveAilfAlias()` to get
12
+ * a consistent resolution + warning story. Callers pass the returned map (or
13
+ * nothing) to `createJiti`.
14
+ */
15
+ import { existsSync, readFileSync, statSync } from "node:fs";
16
+ import { dirname, resolve as pathResolve } from "node:path";
17
+ import { fileURLToPath } from "node:url";
18
+ /**
19
+ * Walk up from this module's location to find the enclosing `@sanity/ailf`
20
+ * package root. Works in both dev (source under `packages/eval/src/`) and
21
+ * production (compiled under `packages/eval/dist/`) because both share the
22
+ * same package.json anchor.
23
+ */
24
+ function findAilfPackageRoot() {
25
+ let dir = dirname(fileURLToPath(import.meta.url));
26
+ while (dir !== dirname(dir)) {
27
+ const pkgPath = pathResolve(dir, "package.json");
28
+ if (existsSync(pkgPath)) {
29
+ try {
30
+ const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
31
+ if (pkg.name === "@sanity/ailf")
32
+ return dir;
33
+ }
34
+ catch {
35
+ /* ignore malformed package.json */
36
+ }
37
+ }
38
+ dir = dirname(dir);
39
+ }
40
+ return null;
41
+ }
42
+ /**
43
+ * Probe whether the user has `@sanity/ailf` installed as a local dependency
44
+ * reachable from the given path. Walks up the directory tree looking for a
45
+ * `node_modules/@sanity/ailf/package.json`. Returns the package entry point
46
+ * path on success, null otherwise.
47
+ *
48
+ * We intentionally do NOT use Node's `require.resolve` self-reference path:
49
+ * tsx and some bundler setups make it unreliable, and a self-reference
50
+ * would only match when the caller *is* the `@sanity/ailf` package (the
51
+ * monorepo devving case), which is semantically the same as having no
52
+ * install — the bundled fallback handles it.
53
+ */
54
+ export function probeUserLocalAilf(fromPath) {
55
+ let dir;
56
+ try {
57
+ dir =
58
+ existsSync(fromPath) && statSync(fromPath).isDirectory()
59
+ ? fromPath
60
+ : dirname(fromPath);
61
+ }
62
+ catch {
63
+ dir = dirname(fromPath);
64
+ }
65
+ while (dir !== dirname(dir)) {
66
+ const pkgJson = pathResolve(dir, "node_modules", "@sanity", "ailf", "package.json");
67
+ if (existsSync(pkgJson)) {
68
+ try {
69
+ const pkg = JSON.parse(readFileSync(pkgJson, "utf-8"));
70
+ const entry = pkg.module ?? pkg.main ?? "index.js";
71
+ return pathResolve(dirname(pkgJson), entry);
72
+ }
73
+ catch {
74
+ return null;
75
+ }
76
+ }
77
+ dir = dirname(dir);
78
+ }
79
+ return null;
80
+ }
81
+ /**
82
+ * Return the path to the CLI's own bundled copy of `@sanity/ailf`. Used as the
83
+ * fallback target when a user's project does not have it installed.
84
+ *
85
+ * We walk the filesystem rather than `require.resolve("@sanity/ailf")` because
86
+ * self-reference resolution is unreliable under tsx and some bundler setups.
87
+ * Returns null in exotic setups where no ancestor package.json matches.
88
+ */
89
+ export function getBundledAilfPath() {
90
+ const pkgRoot = findAilfPackageRoot();
91
+ if (!pkgRoot)
92
+ return null;
93
+ // Production layout: packages/eval/dist/index.js
94
+ const distEntry = pathResolve(pkgRoot, "dist", "index.js");
95
+ if (existsSync(distEntry))
96
+ return distEntry;
97
+ // Development layout (tsx on source): packages/eval/src/index.ts
98
+ const srcEntry = pathResolve(pkgRoot, "src", "index.ts");
99
+ if (existsSync(srcEntry))
100
+ return srcEntry;
101
+ return null;
102
+ }
103
+ let hasWarnedOnce = false;
104
+ /**
105
+ * Emit a one-shot stderr advisory when the loader falls back to the bundled
106
+ * `@sanity/ailf`. The flag is module-scoped so a single pipeline run warns at
107
+ * most once, no matter how many TS files trigger the fallback.
108
+ */
109
+ export function warnBundledFallbackOnce() {
110
+ if (hasWarnedOnce)
111
+ return;
112
+ hasWarnedOnce = true;
113
+ process.stderr.write(" ⚠ @sanity/ailf is not installed in your project — using the CLI's bundled copy.\n" +
114
+ " Pin it locally for reproducibility: npm install -D @sanity/ailf\n");
115
+ }
116
+ /** Test-only: reset the warn-once flag between unit tests. */
117
+ export function resetBundledFallbackWarning() {
118
+ hasWarnedOnce = false;
119
+ }
120
+ /**
121
+ * Decide whether jiti should alias `@sanity/ailf` → bundled-path for the given
122
+ * file. Returns the alias map or null.
123
+ *
124
+ * - User-local resolves → returns null (jiti's natural walk finds it).
125
+ * - User-local fails + bundled path available → returns alias map, fires
126
+ * one-shot warning, logs at verbose level.
127
+ * - User-local fails + no bundled path → returns null (nothing we can do;
128
+ * jiti will surface the original MODULE_NOT_FOUND).
129
+ */
130
+ export function resolveAilfAlias(filePath) {
131
+ const userLocal = probeUserLocalAilf(filePath);
132
+ const verbose = process.env.AILF_LOG_LEVEL === "verbose";
133
+ if (userLocal) {
134
+ if (verbose) {
135
+ process.stderr.write(` [ts-loader] ${filePath} → @sanity/ailf resolved locally at ${userLocal}\n`);
136
+ }
137
+ return null;
138
+ }
139
+ const bundled = getBundledAilfPath();
140
+ if (!bundled)
141
+ return null;
142
+ if (verbose) {
143
+ process.stderr.write(` [ts-loader] ${filePath} → @sanity/ailf not installed locally; using bundled copy at ${bundled}\n`);
144
+ }
145
+ warnBundledFallbackOnce();
146
+ return { "@sanity/ailf": bundled };
147
+ }
@@ -15,6 +15,7 @@
15
15
  import { existsSync } from "fs";
16
16
  import { pathToFileURL } from "node:url";
17
17
  import { createJiti } from "jiti";
18
+ import { resolveAilfAlias } from "./ailf-resolver.js";
18
19
  // ---------------------------------------------------------------------------
19
20
  // jiti instance factory — resolves imports relative to the loaded file
20
21
  // ---------------------------------------------------------------------------
@@ -28,13 +29,19 @@ import { createJiti } from "jiti";
28
29
  *
29
30
  * We pass a `file://` URL (not a bare path) so jiti uses ESM resolution,
30
31
  * which matches the `"import"` condition in package.json exports maps.
32
+ *
33
+ * When the user's project cannot resolve `@sanity/ailf` (fresh directory
34
+ * without a local install), we register an alias pointing at the CLI's own
35
+ * bundled copy so the load still succeeds. See `ailf-resolver.ts`.
31
36
  */
32
37
  function createJitiForFile(filePath) {
38
+ const alias = resolveAilfAlias(filePath);
33
39
  return createJiti(pathToFileURL(filePath).href, {
34
40
  // Interop: handle both `export default` and `module.exports`
35
41
  interopDefault: true,
36
42
  // Don't require file extensions in imports
37
43
  requireCache: true,
44
+ ...(alias ? { alias } : {}),
38
45
  });
39
46
  }
40
47
  /**
@@ -19,15 +19,17 @@ import { z } from "zod";
19
19
  /**
20
20
  * The set of assertion types allowed in task files.
21
21
  *
22
- * This is a curated subset of Promptfoo assertion types we expose only the
23
- * types that are stable, well-documented, and useful for external authors.
22
+ * Combines a curated subset of Promptfoo assertion types (stable, well-
23
+ * documented, useful for external authors) with the agent-harness-specific
24
+ * types mapped by `mode-handlers/agent-harness/assertions.ts`.
24
25
  */
25
- export declare const CURATED_ASSERTION_TYPES: readonly ["llm-rubric", "contains", "contains-any", "contains-all", "not-contains", "icontains", "icontains-any", "regex", "javascript", "similar", "cost", "latency"];
26
+ export declare const CURATED_ASSERTION_TYPES: readonly ["llm-rubric", "contains", "contains-any", "contains-all", "not-contains", "icontains", "icontains-any", "regex", "javascript", "similar", "cost", "latency", "file-exists", "file-contains", "command-succeeds", "diff-matches"];
26
27
  export type CuratedAssertionType = (typeof CURATED_ASSERTION_TYPES)[number];
27
28
  /**
28
- * Valid rubric template names — must match keys in config/rubrics.yaml.
29
+ * Valid rubric template names — must match template keys in
30
+ * `packages/eval/config/rubrics.ts`.
29
31
  */
30
- export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage"];
32
+ export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage", "mcp-input-validation", "mcp-output-correctness", "mcp-error-handling", "mcp-security", "factual-correctness", "completeness", "currency", "process-quality", "agent-output", "agent-tool-usage"];
31
33
  export type RubricTemplateName = (typeof RUBRIC_TEMPLATE_NAMES)[number];
32
34
  /**
33
35
  * Zod schema for a single task definition using canonical field names.
@@ -84,6 +86,16 @@ export declare const CanonicalTaskSchema: z.ZodObject<{
84
86
  "task-completion": "task-completion";
85
87
  "code-correctness": "code-correctness";
86
88
  "doc-coverage": "doc-coverage";
89
+ "mcp-input-validation": "mcp-input-validation";
90
+ "mcp-output-correctness": "mcp-output-correctness";
91
+ "mcp-error-handling": "mcp-error-handling";
92
+ "mcp-security": "mcp-security";
93
+ "factual-correctness": "factual-correctness";
94
+ completeness: "completeness";
95
+ currency: "currency";
96
+ "process-quality": "process-quality";
97
+ "agent-output": "agent-output";
98
+ "agent-tool-usage": "agent-tool-usage";
87
99
  }>;
88
100
  criteria: z.ZodArray<z.ZodString>;
89
101
  weight: z.ZodOptional<z.ZodNumber>;
@@ -101,6 +113,10 @@ export declare const CanonicalTaskSchema: z.ZodObject<{
101
113
  similar: "similar";
102
114
  cost: "cost";
103
115
  latency: "latency";
116
+ "file-exists": "file-exists";
117
+ "file-contains": "file-contains";
118
+ "command-succeeds": "command-succeeds";
119
+ "diff-matches": "diff-matches";
104
120
  }>;
105
121
  value: z.ZodOptional<z.ZodUnknown>;
106
122
  threshold: z.ZodOptional<z.ZodNumber>;
@@ -174,6 +190,16 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodObject<{
174
190
  "task-completion": "task-completion";
175
191
  "code-correctness": "code-correctness";
176
192
  "doc-coverage": "doc-coverage";
193
+ "mcp-input-validation": "mcp-input-validation";
194
+ "mcp-output-correctness": "mcp-output-correctness";
195
+ "mcp-error-handling": "mcp-error-handling";
196
+ "mcp-security": "mcp-security";
197
+ "factual-correctness": "factual-correctness";
198
+ completeness: "completeness";
199
+ currency: "currency";
200
+ "process-quality": "process-quality";
201
+ "agent-output": "agent-output";
202
+ "agent-tool-usage": "agent-tool-usage";
177
203
  }>;
178
204
  criteria: z.ZodArray<z.ZodString>;
179
205
  weight: z.ZodOptional<z.ZodNumber>;
@@ -191,6 +217,10 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodObject<{
191
217
  similar: "similar";
192
218
  cost: "cost";
193
219
  latency: "latency";
220
+ "file-exists": "file-exists";
221
+ "file-contains": "file-contains";
222
+ "command-succeeds": "command-succeeds";
223
+ "diff-matches": "diff-matches";
194
224
  }>;
195
225
  value: z.ZodOptional<z.ZodUnknown>;
196
226
  threshold: z.ZodOptional<z.ZodNumber>;
@@ -22,8 +22,9 @@ import { z } from "zod";
22
22
  /**
23
23
  * The set of assertion types allowed in task files.
24
24
  *
25
- * This is a curated subset of Promptfoo assertion types we expose only the
26
- * types that are stable, well-documented, and useful for external authors.
25
+ * Combines a curated subset of Promptfoo assertion types (stable, well-
26
+ * documented, useful for external authors) with the agent-harness-specific
27
+ * types mapped by `mode-handlers/agent-harness/assertions.ts`.
27
28
  */
28
29
  export const CURATED_ASSERTION_TYPES = [
29
30
  "llm-rubric",
@@ -38,14 +39,35 @@ export const CURATED_ASSERTION_TYPES = [
38
39
  "similar",
39
40
  "cost",
40
41
  "latency",
42
+ // Agent-harness assertions — verify sandbox state after the agent runs.
43
+ // See src/pipeline/compiler/mode-handlers/agent-harness/assertions.ts
44
+ "file-exists",
45
+ "file-contains",
46
+ "command-succeeds",
47
+ "diff-matches",
41
48
  ];
42
49
  /**
43
- * Valid rubric template names — must match keys in config/rubrics.yaml.
50
+ * Valid rubric template names — must match template keys in
51
+ * `packages/eval/config/rubrics.ts`.
44
52
  */
45
53
  export const RUBRIC_TEMPLATE_NAMES = [
54
+ // Core literacy dimensions
46
55
  "task-completion",
47
56
  "code-correctness",
48
57
  "doc-coverage",
58
+ // MCP server dimensions
59
+ "mcp-input-validation",
60
+ "mcp-output-correctness",
61
+ "mcp-error-handling",
62
+ "mcp-security",
63
+ // Knowledge probe dimensions
64
+ "factual-correctness",
65
+ "completeness",
66
+ "currency",
67
+ // Agent harness dimensions
68
+ "process-quality",
69
+ "agent-output",
70
+ "agent-tool-usage",
49
71
  ];
50
72
  // ---------------------------------------------------------------------------
51
73
  // Doc ref schemas — polymorphic canonical doc references
@@ -25,6 +25,7 @@ import { existsSync, readdirSync } from "fs";
25
25
  import { pathToFileURL } from "node:url";
26
26
  import { resolve } from "path";
27
27
  import { createJiti } from "jiti";
28
+ import { resolveAilfAlias } from "../config-sources/ailf-resolver.js";
28
29
  import { loadTsConfig } from "../config-sources/ts-config-loader.js";
29
30
  /**
30
31
  * Discover TS/JS task files in a directory.
@@ -72,9 +73,11 @@ export async function loadTsTaskFile(filePath) {
72
73
  * Needed by resolve-mappings.ts which is called from sync contexts.
73
74
  */
74
75
  export function loadTsTaskFileSync(filePath) {
76
+ const alias = resolveAilfAlias(filePath);
75
77
  const jiti = createJiti(pathToFileURL(filePath).href, {
76
78
  interopDefault: true,
77
79
  requireCache: true,
80
+ ...(alias ? { alias } : {}),
78
81
  });
79
82
  const mod = jiti(filePath);
80
83
  const value = mod && typeof mod === "object" && "default" in mod ? mod.default : mod;
@@ -5,7 +5,7 @@
5
5
  * task files. The generated files are ready-to-edit starting points —
6
6
  * not live evaluation tasks.
7
7
  *
8
- * TypeScript output (default) uses define* helpers from @sanity/ailf-core
8
+ * TypeScript output (default) uses define* helpers from @sanity/ailf
9
9
  * for full IDE autocomplete and type checking. YAML output serializes the
10
10
  * parsed task data. JSON output is a plain serialization of the parsed data.
11
11
  *
@@ -5,7 +5,7 @@
5
5
  * task files. The generated files are ready-to-edit starting points —
6
6
  * not live evaluation tasks.
7
7
  *
8
- * TypeScript output (default) uses define* helpers from @sanity/ailf-core
8
+ * TypeScript output (default) uses define* helpers from @sanity/ailf
9
9
  * for full IDE autocomplete and type checking. YAML output serializes the
10
10
  * parsed task data. JSON output is a plain serialization of the parsed data.
11
11
  *
@@ -20,6 +20,7 @@ import { Command } from "commander";
20
20
  import { existsSync, mkdirSync, writeFileSync } from "fs";
21
21
  import { resolve, relative } from "path";
22
22
  import { ailfConfigData, ailfConfigYaml, ailfConfigTs, taskYamlFiles, taskTsFiles, TASK_FILE_NAMES, TASK_EXAMPLES, allTaskData, workflowYaml, } from "../_vendor/ailf-core/index.js";
23
+ import { probeUserLocalAilf } from "../adapters/config-sources/ailf-resolver.js";
23
24
  // ---------------------------------------------------------------------------
24
25
  // Command factory
25
26
  // ---------------------------------------------------------------------------
@@ -82,6 +83,13 @@ async function runInit(opts) {
82
83
  console.log();
83
84
  console.log(" 🚀 Initializing AI Literacy Framework");
84
85
  console.log();
86
+ if (format === "ts" && !probeUserLocalAilf(targetDir)) {
87
+ console.log(" ℹ @sanity/ailf is not installed in this project yet.");
88
+ console.log(" For reproducibility and IDE autocomplete, install it after init:");
89
+ console.log(" npm install -D @sanity/ailf (or pnpm add -D, yarn add -D)");
90
+ console.log(" The pipeline will fall back to the CLI's bundled copy until you do.");
91
+ console.log();
92
+ }
85
93
  // 1. Create directories
86
94
  mkdirSync(tasksDir, { recursive: true });
87
95
  console.log(` ✓ Created ${rel(targetDir, ailfDir)}/`);
@@ -252,13 +260,13 @@ async function runInit(opts) {
252
260
  console.log(` 2. Validate locally: npx @sanity/ailf@latest validate-tasks .ailf/tasks/`);
253
261
  console.log(" 3. Add a GitHub Actions secret");
254
262
  console.log(" (Settings → Secrets and variables → Actions):");
255
- console.log(" • AILF_API_KEY — your API key (starts with ailf_live_sk_)");
263
+ console.log(" • AILF_API_KEY — your API key");
256
264
  console.log(" 4. Push — the workflow at .github/workflows/ailf-eval.yml runs");
257
265
  console.log(" automatically on PRs");
258
266
  if (format === "ts") {
259
267
  console.log();
260
268
  console.log(` 💡 TypeScript tasks (${taskExt}) give you full IDE autocomplete`);
261
- console.log(" via defineTask() from @sanity/ailf-core.");
269
+ console.log(" via defineTask() from @sanity/ailf.");
262
270
  }
263
271
  console.log();
264
272
  console.log(" 🔑 Retrieve the API key from 1Password (Sanity employees):");
@@ -268,9 +276,15 @@ async function runInit(opts) {
268
276
  console.log();
269
277
  console.log(" Not a Sanity employee? Request an API key from the AILF team.");
270
278
  console.log();
271
- console.log(" 💡 Test locally before pushing:");
279
+ console.log(" 💡 Test a remote run (executes against the AILF API) before pushing:");
272
280
  console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --debug");
273
281
  console.log();
282
+ console.log(" 💡 Or test a remote run against your repo tasks:");
283
+ console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --task-source=repo --debug");
284
+ console.log();
285
+ console.log(" 💡 Or run locally against your repo tasks:");
286
+ console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --mode=literacy --variant=full --task-source=repo --debug --explain -y");
287
+ console.log();
274
288
  }
275
289
  // ---------------------------------------------------------------------------
276
290
  // Custom preset scaffold template
@@ -289,7 +303,7 @@ const CUSTOM_PRESET_TS = `/**
289
303
  * @see https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/presets.md
290
304
  */
291
305
 
292
- import { definePreset } from "../_vendor/ailf-core/index.js"
306
+ import { definePreset } from "@sanity/ailf"
293
307
 
294
308
  export default definePreset({
295
309
  name: "my-docs-evaluation",
@@ -191,12 +191,18 @@ export function computeResolvedOptions(opts) {
191
191
  // Smart default: full runs auto-publish when store is configured
192
192
  publishEnabled = reportStoreConfigured && !debugEnabled;
193
193
  }
194
+ // Resolve task source + repo tasks path before anything that depends on
195
+ // them (report store overrides, output dir). When --task-source=repo is
196
+ // set without --repo-tasks-path, default to ./.ailf/tasks/ — the location
197
+ // created by `ailf init`.
198
+ const resolvedTaskSourceType = resolveTaskSourceType(opts.taskSource);
199
+ const resolvedRepoTasksPath = resolveRepoTasksPath(callerCwd, opts.repoTasksPath, resolvedTaskSourceType);
194
200
  // Report store overrides — resolution order:
195
201
  // 1. Explicit CLI flags (--report-dataset, --report-project)
196
202
  // 2. Environment variables (AILF_REPORT_DATASET, AILF_REPORT_PROJECT_ID)
197
- // 3. .ailf/config.yaml reportStore block (when --repo-tasks-path is set)
203
+ // 3. .ailf/config.yaml reportStore block (when repo tasks path is set)
198
204
  // 4. Eval dataset override (so perspective evals publish to the same dataset)
199
- const repoConfig = loadRepoConfigIfPresent(opts.repoTasksPath);
205
+ const repoConfig = loadRepoConfigIfPresent(resolvedRepoTasksPath);
200
206
  const reportDataset = opts.reportDataset ??
201
207
  process.env.AILF_REPORT_DATASET ??
202
208
  repoConfig?.reportStore?.dataset ??
@@ -211,10 +217,6 @@ export function computeResolvedOptions(opts) {
211
217
  const apiUrl = opts.apiUrl ?? process.env.AILF_API_URL ?? "https://ailf-api.sanity.build";
212
218
  const apiKey = process.env.AILF_API_KEY ?? undefined;
213
219
  // Output directory: explicit --output-dir → $CWD/.ailf/results/latest/
214
- const resolvedRepoTasksPath = opts.repoTasksPath
215
- ? resolve(callerCwd, opts.repoTasksPath)
216
- : undefined;
217
- const resolvedTaskSourceType = resolveTaskSourceType(opts.taskSource);
218
220
  const outputDir = resolveOutputDir(opts.outputDir);
219
221
  return {
220
222
  allowedOriginArgs,
@@ -299,6 +301,39 @@ function resolveTaskSourceType(raw) {
299
301
  console.error(`❌ Invalid --task-source "${raw}". Must be "repo" or "content-lake".`);
300
302
  process.exit(1);
301
303
  }
304
+ /**
305
+ * Resolve the repo tasks path.
306
+ *
307
+ * - Explicit `--repo-tasks-path` wins (resolved relative to callerCwd).
308
+ * - When `--task-source=repo` is set without a path, defaults to
309
+ * `./.ailf/tasks/` in callerCwd — the location created by `ailf init`.
310
+ * - Otherwise returns undefined (Content Lake source).
311
+ *
312
+ * Exits with a helpful error when an explicit path doesn't exist, or when
313
+ * the repo source was requested but no tasks directory can be found.
314
+ */
315
+ function resolveRepoTasksPath(callerCwd, explicitPath, taskSourceType) {
316
+ if (explicitPath) {
317
+ const abs = resolve(callerCwd, explicitPath);
318
+ if (!existsSync(abs)) {
319
+ console.error(`❌ Repo tasks directory not found: ${abs}\n` +
320
+ " Provide a valid --repo-tasks-path, or run 'ailf init' to scaffold .ailf/tasks/.");
321
+ process.exit(1);
322
+ }
323
+ return abs;
324
+ }
325
+ if (taskSourceType === "repo") {
326
+ const defaultPath = resolve(callerCwd, ".ailf", "tasks");
327
+ if (!existsSync(defaultPath)) {
328
+ console.error(`❌ --task-source=repo was set but no tasks directory was found.\n` +
329
+ ` Looked for: ${defaultPath}\n` +
330
+ " Run 'ailf init' to scaffold .ailf/tasks/, or pass --repo-tasks-path <path>.");
331
+ process.exit(1);
332
+ }
333
+ return defaultPath;
334
+ }
335
+ return undefined;
336
+ }
302
337
  // ---------------------------------------------------------------------------
303
338
  // Pipeline entry point
304
339
  // ---------------------------------------------------------------------------
@@ -330,6 +365,16 @@ export async function executePipeline(cliOpts) {
330
365
  if (cliOpts.repoTasksPath) {
331
366
  config.repoTasksPath = resolve(callerCwd, cliOpts.repoTasksPath);
332
367
  }
368
+ else if (config.taskSourceType === "repo" && !config.repoTasksPath) {
369
+ // Default: when taskSource=repo but no path set, look in .ailf/tasks/
370
+ // (matches the `ailf init` scaffold location). Silent fallback here —
371
+ // composition root will surface a helpful error if the directory is
372
+ // missing.
373
+ const defaultPath = resolve(callerCwd, ".ailf", "tasks");
374
+ if (existsSync(defaultPath)) {
375
+ config.repoTasksPath = defaultPath;
376
+ }
377
+ }
333
378
  if (cliOpts.output) {
334
379
  config.outputPath = resolve(callerCwd, cliOpts.output);
335
380
  }
@@ -51,7 +51,7 @@ export function createPipelineCommand() {
51
51
  .option("--output-dir <path>", "Base directory for pipeline output artifacts (default: inferred from execution context)")
52
52
  .option("--promptfoo-url <url>", "Promptfoo share URL for report")
53
53
  .option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), repo (repo tasks only, no Content Lake merge)", "content-lake")
54
- .option("--repo-tasks-path <path>", "Path to repo-based task definitions (.ailf/tasks/ directory)")
54
+ .option("--repo-tasks-path <path>", "Path to repo-based task definitions. Defaults to ./.ailf/tasks/ when --task-source=repo.")
55
55
  .option("--remote", "Submit evaluation to the AILF API instead of running locally", false)
56
56
  .option("--api-url <url>", "AILF API base URL (default: https://ailf-api.sanity.build)")
57
57
  .option("--no-artifacts", "Disable all artifact writers (D0033). Overrides --artifacts-dir.")
@@ -1,9 +1,9 @@
1
1
  /**
2
2
  * validate-tasks command — standalone validation of task files.
3
3
  *
4
- * Validates .ailf/tasks/*.yaml files against the CanonicalTaskSchema without
5
- * running the full pipeline. Useful for pre-commit hooks and CI checks
6
- * in external repos.
4
+ * Validates .ailf/tasks/*.yaml and .ailf/tasks/*.task.ts files against the
5
+ * CanonicalTaskSchema without running the full pipeline. Useful for
6
+ * pre-commit hooks and CI checks in external repos.
7
7
  *
8
8
  * Usage:
9
9
  * ailf validate-tasks .ailf/tasks/
@@ -11,6 +11,17 @@
11
11
  *
12
12
  * @see packages/eval/src/adapters/task-sources/repo-schemas.ts
13
13
  * @see packages/eval/src/adapters/task-sources/repo-validation.ts
14
+ * @see packages/eval/src/adapters/task-sources/task-file-loader.ts
14
15
  */
15
16
  import { Command } from "commander";
17
+ export interface ValidateTasksOptions {
18
+ strict: boolean;
19
+ callerCwd?: string;
20
+ }
16
21
  export declare function createValidateTasksCommand(): Command;
22
+ /**
23
+ * Execute the validate-tasks command logic. Returns the exit code (0 success,
24
+ * 1 failure) so callers can decide how to surface it — the CLI wrapper calls
25
+ * `process.exit`, tests can assert directly.
26
+ */
27
+ export declare function runValidateTasks(tasksPath: string, opts: ValidateTasksOptions): Promise<number>;