npm - @sanity/ailf - Versions diffs - 3.2.0 → 3.3.1 - Mend

@sanity/ailf 3.2.0 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/dist/_vendor/ailf-core/examples/index.d.ts +8 -8
package/dist/_vendor/ailf-core/examples/index.js +8 -8
package/dist/_vendor/ailf-shared/feature-flags.d.ts +59 -0
package/dist/_vendor/ailf-shared/feature-flags.js +44 -0
package/dist/_vendor/ailf-shared/index.d.ts +1 -0
package/dist/_vendor/ailf-shared/index.js +1 -0
package/dist/adapters/config-sources/ailf-resolver.d.ts +55 -0
package/dist/adapters/config-sources/ailf-resolver.js +147 -0
package/dist/adapters/config-sources/ts-config-loader.js +7 -0
package/dist/adapters/task-sources/repo-schemas.d.ts +35 -5
package/dist/adapters/task-sources/repo-schemas.js +25 -3
package/dist/adapters/task-sources/task-file-loader.js +3 -0
package/dist/commands/init.d.ts +1 -1
package/dist/commands/init.js +19 -5
package/dist/commands/pipeline-action.js +51 -6
package/dist/commands/pipeline.js +1 -1
package/dist/commands/validate-tasks.d.ts +14 -3
package/dist/commands/validate-tasks.js +125 -81
package/dist/index.d.ts +2 -0
package/dist/index.js +4 -0
package/dist/pipeline/compiler/config-loader.js +6 -1
package/dist/pipeline/compiler/preset-loader.js +3 -0
package/package.json +1 -1

package/dist/adapters/config-sources/ailf-resolver.d.ts ADDED Viewed

@@ -0,0 +1,55 @@
+/**
+ * ailf-resolver.ts — locate `@sanity/ailf` for user TS files, with a bundled fallback.
+ *
+ * User `.ailf/*.ts` files import `defineTask` / `defineConfig` / `definePreset`
+ * from `@sanity/ailf`. In a fresh project with no local install, that bare
+ * specifier cannot resolve from the user's tree. To keep `ailf init` → `ailf
+ * pipeline` working out of the box we transparently fall back to the CLI's own
+ * copy of `@sanity/ailf` by registering a jiti module alias. A user-local
+ * install always wins — the fallback kicks in only when resolution fails.
+ *
+ * All jiti callsites across the eval package use `resolveAilfAlias()` to get
+ * a consistent resolution + warning story. Callers pass the returned map (or
+ * nothing) to `createJiti`.
+ */
+/**
+ * Probe whether the user has `@sanity/ailf` installed as a local dependency
+ * reachable from the given path. Walks up the directory tree looking for a
+ * `node_modules/@sanity/ailf/package.json`. Returns the package entry point
+ * path on success, null otherwise.
+ *
+ * We intentionally do NOT use Node's `require.resolve` self-reference path:
+ * tsx and some bundler setups make it unreliable, and a self-reference
+ * would only match when the caller *is* the `@sanity/ailf` package (the
+ * monorepo devving case), which is semantically the same as having no
+ * install — the bundled fallback handles it.
+ */
+export declare function probeUserLocalAilf(fromPath: string): string | null;
+/**
+ * Return the path to the CLI's own bundled copy of `@sanity/ailf`. Used as the
+ * fallback target when a user's project does not have it installed.
+ *
+ * We walk the filesystem rather than `require.resolve("@sanity/ailf")` because
+ * self-reference resolution is unreliable under tsx and some bundler setups.
+ * Returns null in exotic setups where no ancestor package.json matches.
+ */
+export declare function getBundledAilfPath(): string | null;
+/**
+ * Emit a one-shot stderr advisory when the loader falls back to the bundled
+ * `@sanity/ailf`. The flag is module-scoped so a single pipeline run warns at
+ * most once, no matter how many TS files trigger the fallback.
+ */
+export declare function warnBundledFallbackOnce(): void;
+/** Test-only: reset the warn-once flag between unit tests. */
+export declare function resetBundledFallbackWarning(): void;
+/**
+ * Decide whether jiti should alias `@sanity/ailf` → bundled-path for the given
+ * file. Returns the alias map or null.
+ *
+ * - User-local resolves → returns null (jiti's natural walk finds it).
+ * - User-local fails + bundled path available → returns alias map, fires
+ *   one-shot warning, logs at verbose level.
+ * - User-local fails + no bundled path → returns null (nothing we can do;
+ *   jiti will surface the original MODULE_NOT_FOUND).
+ */
+export declare function resolveAilfAlias(filePath: string): Record<string, string> | null;

package/dist/adapters/config-sources/ailf-resolver.js ADDED Viewed

@@ -0,0 +1,147 @@
+/**
+ * ailf-resolver.ts — locate `@sanity/ailf` for user TS files, with a bundled fallback.
+ *
+ * User `.ailf/*.ts` files import `defineTask` / `defineConfig` / `definePreset`
+ * from `@sanity/ailf`. In a fresh project with no local install, that bare
+ * specifier cannot resolve from the user's tree. To keep `ailf init` → `ailf
+ * pipeline` working out of the box we transparently fall back to the CLI's own
+ * copy of `@sanity/ailf` by registering a jiti module alias. A user-local
+ * install always wins — the fallback kicks in only when resolution fails.
+ *
+ * All jiti callsites across the eval package use `resolveAilfAlias()` to get
+ * a consistent resolution + warning story. Callers pass the returned map (or
+ * nothing) to `createJiti`.
+ */
+import { existsSync, readFileSync, statSync } from "node:fs";
+import { dirname, resolve as pathResolve } from "node:path";
+import { fileURLToPath } from "node:url";
+/**
+ * Walk up from this module's location to find the enclosing `@sanity/ailf`
+ * package root. Works in both dev (source under `packages/eval/src/`) and
+ * production (compiled under `packages/eval/dist/`) because both share the
+ * same package.json anchor.
+ */
+function findAilfPackageRoot() {
+    let dir = dirname(fileURLToPath(import.meta.url));
+    while (dir !== dirname(dir)) {
+        const pkgPath = pathResolve(dir, "package.json");
+        if (existsSync(pkgPath)) {
+            try {
+                const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
+                if (pkg.name === "@sanity/ailf")
+                    return dir;
+            }
+            catch {
+                /* ignore malformed package.json */
+            }
+        }
+        dir = dirname(dir);
+    }
+    return null;
+}
+/**
+ * Probe whether the user has `@sanity/ailf` installed as a local dependency
+ * reachable from the given path. Walks up the directory tree looking for a
+ * `node_modules/@sanity/ailf/package.json`. Returns the package entry point
+ * path on success, null otherwise.
+ *
+ * We intentionally do NOT use Node's `require.resolve` self-reference path:
+ * tsx and some bundler setups make it unreliable, and a self-reference
+ * would only match when the caller *is* the `@sanity/ailf` package (the
+ * monorepo devving case), which is semantically the same as having no
+ * install — the bundled fallback handles it.
+ */
+export function probeUserLocalAilf(fromPath) {
+    let dir;
+    try {
+        dir =
+            existsSync(fromPath) && statSync(fromPath).isDirectory()
+                ? fromPath
+                : dirname(fromPath);
+    }
+    catch {
+        dir = dirname(fromPath);
+    }
+    while (dir !== dirname(dir)) {
+        const pkgJson = pathResolve(dir, "node_modules", "@sanity", "ailf", "package.json");
+        if (existsSync(pkgJson)) {
+            try {
+                const pkg = JSON.parse(readFileSync(pkgJson, "utf-8"));
+                const entry = pkg.module ?? pkg.main ?? "index.js";
+                return pathResolve(dirname(pkgJson), entry);
+            }
+            catch {
+                return null;
+            }
+        }
+        dir = dirname(dir);
+    }
+    return null;
+}
+/**
+ * Return the path to the CLI's own bundled copy of `@sanity/ailf`. Used as the
+ * fallback target when a user's project does not have it installed.
+ *
+ * We walk the filesystem rather than `require.resolve("@sanity/ailf")` because
+ * self-reference resolution is unreliable under tsx and some bundler setups.
+ * Returns null in exotic setups where no ancestor package.json matches.
+ */
+export function getBundledAilfPath() {
+    const pkgRoot = findAilfPackageRoot();
+    if (!pkgRoot)
+        return null;
+    // Production layout: packages/eval/dist/index.js
+    const distEntry = pathResolve(pkgRoot, "dist", "index.js");
+    if (existsSync(distEntry))
+        return distEntry;
+    // Development layout (tsx on source): packages/eval/src/index.ts
+    const srcEntry = pathResolve(pkgRoot, "src", "index.ts");
+    if (existsSync(srcEntry))
+        return srcEntry;
+    return null;
+}
+let hasWarnedOnce = false;
+/**
+ * Emit a one-shot stderr advisory when the loader falls back to the bundled
+ * `@sanity/ailf`. The flag is module-scoped so a single pipeline run warns at
+ * most once, no matter how many TS files trigger the fallback.
+ */
+export function warnBundledFallbackOnce() {
+    if (hasWarnedOnce)
+        return;
+    hasWarnedOnce = true;
+    process.stderr.write("  ⚠ @sanity/ailf is not installed in your project — using the CLI's bundled copy.\n" +
+        "    Pin it locally for reproducibility: npm install -D @sanity/ailf\n");
+}
+/** Test-only: reset the warn-once flag between unit tests. */
+export function resetBundledFallbackWarning() {
+    hasWarnedOnce = false;
+}
+/**
+ * Decide whether jiti should alias `@sanity/ailf` → bundled-path for the given
+ * file. Returns the alias map or null.
+ *
+ * - User-local resolves → returns null (jiti's natural walk finds it).
+ * - User-local fails + bundled path available → returns alias map, fires
+ *   one-shot warning, logs at verbose level.
+ * - User-local fails + no bundled path → returns null (nothing we can do;
+ *   jiti will surface the original MODULE_NOT_FOUND).
+ */
+export function resolveAilfAlias(filePath) {
+    const userLocal = probeUserLocalAilf(filePath);
+    const verbose = process.env.AILF_LOG_LEVEL === "verbose";
+    if (userLocal) {
+        if (verbose) {
+            process.stderr.write(`  [ts-loader] ${filePath} → @sanity/ailf resolved locally at ${userLocal}\n`);
+        }
+        return null;
+    }
+    const bundled = getBundledAilfPath();
+    if (!bundled)
+        return null;
+    if (verbose) {
+        process.stderr.write(`  [ts-loader] ${filePath} → @sanity/ailf not installed locally; using bundled copy at ${bundled}\n`);
+    }
+    warnBundledFallbackOnce();
+    return { "@sanity/ailf": bundled };
+}

package/dist/adapters/config-sources/ts-config-loader.js CHANGED Viewed

@@ -15,6 +15,7 @@
 import { existsSync } from "fs";
 import { pathToFileURL } from "node:url";
 import { createJiti } from "jiti";
+import { resolveAilfAlias } from "./ailf-resolver.js";
 // ---------------------------------------------------------------------------
 // jiti instance factory — resolves imports relative to the loaded file
 // ---------------------------------------------------------------------------
@@ -28,13 +29,19 @@ import { createJiti } from "jiti";
  *
  * We pass a `file://` URL (not a bare path) so jiti uses ESM resolution,
  * which matches the `"import"` condition in package.json exports maps.
+ *
+ * When the user's project cannot resolve `@sanity/ailf` (fresh directory
+ * without a local install), we register an alias pointing at the CLI's own
+ * bundled copy so the load still succeeds. See `ailf-resolver.ts`.
  */
 function createJitiForFile(filePath) {
+    const alias = resolveAilfAlias(filePath);
     return createJiti(pathToFileURL(filePath).href, {
         // Interop: handle both `export default` and `module.exports`
         interopDefault: true,
         // Don't require file extensions in imports
         requireCache: true,
+        ...(alias ? { alias } : {}),
     });
 }
 /**

package/dist/adapters/task-sources/repo-schemas.d.ts CHANGED Viewed

@@ -19,15 +19,17 @@ import { z } from "zod";
 /**
  * The set of assertion types allowed in task files.
  *
- * This is a curated subset of Promptfoo assertion types — we expose only the
- * types that are stable, well-documented, and useful for external authors.
+ * Combines a curated subset of Promptfoo assertion types (stable, well-
+ * documented, useful for external authors) with the agent-harness-specific
+ * types mapped by `mode-handlers/agent-harness/assertions.ts`.
  */
-export declare const CURATED_ASSERTION_TYPES: readonly ["llm-rubric", "contains", "contains-any", "contains-all", "not-contains", "icontains", "icontains-any", "regex", "javascript", "similar", "cost", "latency"];
+export declare const CURATED_ASSERTION_TYPES: readonly ["llm-rubric", "contains", "contains-any", "contains-all", "not-contains", "icontains", "icontains-any", "regex", "javascript", "similar", "cost", "latency", "file-exists", "file-contains", "command-succeeds", "diff-matches"];
 export type CuratedAssertionType = (typeof CURATED_ASSERTION_TYPES)[number];
 /**
- * Valid rubric template names — must match keys in config/rubrics.yaml.
+ * Valid rubric template names — must match template keys in
+ * `packages/eval/config/rubrics.ts`.
  */
-export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage"];
+export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage", "mcp-input-validation", "mcp-output-correctness", "mcp-error-handling", "mcp-security", "factual-correctness", "completeness", "currency", "process-quality", "agent-output", "agent-tool-usage"];
 export type RubricTemplateName = (typeof RUBRIC_TEMPLATE_NAMES)[number];
 /**
  * Zod schema for a single task definition using canonical field names.
@@ -84,6 +86,16 @@ export declare const CanonicalTaskSchema: z.ZodObject<{
             "task-completion": "task-completion";
             "code-correctness": "code-correctness";
             "doc-coverage": "doc-coverage";
+            "mcp-input-validation": "mcp-input-validation";
+            "mcp-output-correctness": "mcp-output-correctness";
+            "mcp-error-handling": "mcp-error-handling";
+            "mcp-security": "mcp-security";
+            "factual-correctness": "factual-correctness";
+            completeness: "completeness";
+            currency: "currency";
+            "process-quality": "process-quality";
+            "agent-output": "agent-output";
+            "agent-tool-usage": "agent-tool-usage";
         }>;
         criteria: z.ZodArray<z.ZodString>;
         weight: z.ZodOptional<z.ZodNumber>;
@@ -101,6 +113,10 @@ export declare const CanonicalTaskSchema: z.ZodObject<{
             similar: "similar";
             cost: "cost";
             latency: "latency";
+            "file-exists": "file-exists";
+            "file-contains": "file-contains";
+            "command-succeeds": "command-succeeds";
+            "diff-matches": "diff-matches";
         }>;
         value: z.ZodOptional<z.ZodUnknown>;
         threshold: z.ZodOptional<z.ZodNumber>;
@@ -174,6 +190,16 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodObject<{
             "task-completion": "task-completion";
             "code-correctness": "code-correctness";
             "doc-coverage": "doc-coverage";
+            "mcp-input-validation": "mcp-input-validation";
+            "mcp-output-correctness": "mcp-output-correctness";
+            "mcp-error-handling": "mcp-error-handling";
+            "mcp-security": "mcp-security";
+            "factual-correctness": "factual-correctness";
+            completeness: "completeness";
+            currency: "currency";
+            "process-quality": "process-quality";
+            "agent-output": "agent-output";
+            "agent-tool-usage": "agent-tool-usage";
         }>;
         criteria: z.ZodArray<z.ZodString>;
         weight: z.ZodOptional<z.ZodNumber>;
@@ -191,6 +217,10 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodObject<{
             similar: "similar";
             cost: "cost";
             latency: "latency";
+            "file-exists": "file-exists";
+            "file-contains": "file-contains";
+            "command-succeeds": "command-succeeds";
+            "diff-matches": "diff-matches";
         }>;
         value: z.ZodOptional<z.ZodUnknown>;
         threshold: z.ZodOptional<z.ZodNumber>;

package/dist/adapters/task-sources/repo-schemas.js CHANGED Viewed

@@ -22,8 +22,9 @@ import { z } from "zod";
 /**
  * The set of assertion types allowed in task files.
  *
- * This is a curated subset of Promptfoo assertion types — we expose only the
- * types that are stable, well-documented, and useful for external authors.
+ * Combines a curated subset of Promptfoo assertion types (stable, well-
+ * documented, useful for external authors) with the agent-harness-specific
+ * types mapped by `mode-handlers/agent-harness/assertions.ts`.
  */
 export const CURATED_ASSERTION_TYPES = [
     "llm-rubric",
@@ -38,14 +39,35 @@ export const CURATED_ASSERTION_TYPES = [
     "similar",
     "cost",
     "latency",
+    // Agent-harness assertions — verify sandbox state after the agent runs.
+    // See src/pipeline/compiler/mode-handlers/agent-harness/assertions.ts
+    "file-exists",
+    "file-contains",
+    "command-succeeds",
+    "diff-matches",
 ];
 /**
- * Valid rubric template names — must match keys in config/rubrics.yaml.
+ * Valid rubric template names — must match template keys in
+ * `packages/eval/config/rubrics.ts`.
  */
 export const RUBRIC_TEMPLATE_NAMES = [
+    // Core literacy dimensions
     "task-completion",
     "code-correctness",
     "doc-coverage",
+    // MCP server dimensions
+    "mcp-input-validation",
+    "mcp-output-correctness",
+    "mcp-error-handling",
+    "mcp-security",
+    // Knowledge probe dimensions
+    "factual-correctness",
+    "completeness",
+    "currency",
+    // Agent harness dimensions
+    "process-quality",
+    "agent-output",
+    "agent-tool-usage",
 ];
 // ---------------------------------------------------------------------------
 // Doc ref schemas — polymorphic canonical doc references

package/dist/adapters/task-sources/task-file-loader.js CHANGED Viewed

@@ -25,6 +25,7 @@ import { existsSync, readdirSync } from "fs";
 import { pathToFileURL } from "node:url";
 import { resolve } from "path";
 import { createJiti } from "jiti";
+import { resolveAilfAlias } from "../config-sources/ailf-resolver.js";
 import { loadTsConfig } from "../config-sources/ts-config-loader.js";
 /**
  * Discover TS/JS task files in a directory.
@@ -72,9 +73,11 @@ export async function loadTsTaskFile(filePath) {
  * Needed by resolve-mappings.ts which is called from sync contexts.
  */
 export function loadTsTaskFileSync(filePath) {
+    const alias = resolveAilfAlias(filePath);
     const jiti = createJiti(pathToFileURL(filePath).href, {
         interopDefault: true,
         requireCache: true,
+        ...(alias ? { alias } : {}),
     });
     const mod = jiti(filePath);
     const value = mod && typeof mod === "object" && "default" in mod ? mod.default : mod;

package/dist/commands/init.d.ts CHANGED Viewed

@@ -5,7 +5,7 @@
  * task files. The generated files are ready-to-edit starting points —
  * not live evaluation tasks.
  *
- * TypeScript output (default) uses define* helpers from @sanity/ailf-core
+ * TypeScript output (default) uses define* helpers from @sanity/ailf
  * for full IDE autocomplete and type checking. YAML output serializes the
  * parsed task data. JSON output is a plain serialization of the parsed data.
  *

package/dist/commands/init.js CHANGED Viewed

@@ -5,7 +5,7 @@
  * task files. The generated files are ready-to-edit starting points —
  * not live evaluation tasks.
  *
- * TypeScript output (default) uses define* helpers from @sanity/ailf-core
+ * TypeScript output (default) uses define* helpers from @sanity/ailf
  * for full IDE autocomplete and type checking. YAML output serializes the
  * parsed task data. JSON output is a plain serialization of the parsed data.
  *
@@ -20,6 +20,7 @@ import { Command } from "commander";
 import { existsSync, mkdirSync, writeFileSync } from "fs";
 import { resolve, relative } from "path";
 import { ailfConfigData, ailfConfigYaml, ailfConfigTs, taskYamlFiles, taskTsFiles, TASK_FILE_NAMES, TASK_EXAMPLES, allTaskData, workflowYaml, } from "../_vendor/ailf-core/index.js";
+import { probeUserLocalAilf } from "../adapters/config-sources/ailf-resolver.js";
 // ---------------------------------------------------------------------------
 // Command factory
 // ---------------------------------------------------------------------------
@@ -82,6 +83,13 @@ async function runInit(opts) {
     console.log();
     console.log("  🚀 Initializing AI Literacy Framework");
     console.log();
+    if (format === "ts" && !probeUserLocalAilf(targetDir)) {
+        console.log("  ℹ @sanity/ailf is not installed in this project yet.");
+        console.log("    For reproducibility and IDE autocomplete, install it after init:");
+        console.log("      npm install -D @sanity/ailf   (or pnpm add -D, yarn add -D)");
+        console.log("    The pipeline will fall back to the CLI's bundled copy until you do.");
+        console.log();
+    }
     // 1. Create directories
     mkdirSync(tasksDir, { recursive: true });
     console.log(`  ✓ Created ${rel(targetDir, ailfDir)}/`);
@@ -252,13 +260,13 @@ async function runInit(opts) {
     console.log(`  2. Validate locally: npx @sanity/ailf@latest validate-tasks .ailf/tasks/`);
     console.log("  3. Add a GitHub Actions secret");
     console.log("     (Settings → Secrets and variables → Actions):");
-    console.log("     • AILF_API_KEY — your API key (starts with ailf_live_sk_)");
+    console.log("     • AILF_API_KEY — your API key");
     console.log("  4. Push — the workflow at .github/workflows/ailf-eval.yml runs");
     console.log("     automatically on PRs");
     if (format === "ts") {
         console.log();
         console.log(`  💡 TypeScript tasks (${taskExt}) give you full IDE autocomplete`);
-        console.log("     via defineTask() from @sanity/ailf-core.");
+        console.log("     via defineTask() from @sanity/ailf.");
     }
     console.log();
     console.log("  🔑 Retrieve the API key from 1Password (Sanity employees):");
@@ -268,9 +276,15 @@ async function runInit(opts) {
     console.log();
     console.log("     Not a Sanity employee? Request an API key from the AILF team.");
     console.log();
-    console.log("  💡 Test locally before pushing:");
+    console.log("  💡 Test a remote run (executes against the AILF API) before pushing:");
     console.log("     AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --debug");
     console.log();
+    console.log("  💡 Or test a remote run against your repo tasks:");
+    console.log("     AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --task-source=repo --debug");
+    console.log();
+    console.log("  💡 Or run locally against your repo tasks:");
+    console.log("     AILF_API_KEY=... npx @sanity/ailf@latest pipeline --mode=literacy --variant=full --task-source=repo --debug --explain -y");
+    console.log();
 }
 // ---------------------------------------------------------------------------
 // Custom preset scaffold template
@@ -289,7 +303,7 @@ const CUSTOM_PRESET_TS = `/**
  * @see https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/presets.md
  */
-import { definePreset } from "../_vendor/ailf-core/index.js"
+import { definePreset } from "@sanity/ailf"
 export default definePreset({
   name: "my-docs-evaluation",

package/dist/commands/pipeline-action.js CHANGED Viewed

@@ -191,12 +191,18 @@ export function computeResolvedOptions(opts) {
         // Smart default: full runs auto-publish when store is configured
         publishEnabled = reportStoreConfigured && !debugEnabled;
     }
+    // Resolve task source + repo tasks path before anything that depends on
+    // them (report store overrides, output dir). When --task-source=repo is
+    // set without --repo-tasks-path, default to ./.ailf/tasks/ — the location
+    // created by `ailf init`.
+    const resolvedTaskSourceType = resolveTaskSourceType(opts.taskSource);
+    const resolvedRepoTasksPath = resolveRepoTasksPath(callerCwd, opts.repoTasksPath, resolvedTaskSourceType);
     // Report store overrides — resolution order:
     //   1. Explicit CLI flags (--report-dataset, --report-project)
     //   2. Environment variables (AILF_REPORT_DATASET, AILF_REPORT_PROJECT_ID)
-    //   3. .ailf/config.yaml reportStore block (when --repo-tasks-path is set)
+    //   3. .ailf/config.yaml reportStore block (when repo tasks path is set)
     //   4. Eval dataset override (so perspective evals publish to the same dataset)
-    const repoConfig = loadRepoConfigIfPresent(opts.repoTasksPath);
+    const repoConfig = loadRepoConfigIfPresent(resolvedRepoTasksPath);
     const reportDataset = opts.reportDataset ??
         process.env.AILF_REPORT_DATASET ??
         repoConfig?.reportStore?.dataset ??
@@ -211,10 +217,6 @@ export function computeResolvedOptions(opts) {
     const apiUrl = opts.apiUrl ?? process.env.AILF_API_URL ?? "https://ailf-api.sanity.build";
     const apiKey = process.env.AILF_API_KEY ?? undefined;
     // Output directory: explicit --output-dir → $CWD/.ailf/results/latest/
-    const resolvedRepoTasksPath = opts.repoTasksPath
-        ? resolve(callerCwd, opts.repoTasksPath)
-        : undefined;
-    const resolvedTaskSourceType = resolveTaskSourceType(opts.taskSource);
     const outputDir = resolveOutputDir(opts.outputDir);
     return {
         allowedOriginArgs,
@@ -299,6 +301,39 @@ function resolveTaskSourceType(raw) {
     console.error(`❌ Invalid --task-source "${raw}". Must be "repo" or "content-lake".`);
     process.exit(1);
 }
+/**
+ * Resolve the repo tasks path.
+ *
+ * - Explicit `--repo-tasks-path` wins (resolved relative to callerCwd).
+ * - When `--task-source=repo` is set without a path, defaults to
+ *   `./.ailf/tasks/` in callerCwd — the location created by `ailf init`.
+ * - Otherwise returns undefined (Content Lake source).
+ *
+ * Exits with a helpful error when an explicit path doesn't exist, or when
+ * the repo source was requested but no tasks directory can be found.
+ */
+function resolveRepoTasksPath(callerCwd, explicitPath, taskSourceType) {
+    if (explicitPath) {
+        const abs = resolve(callerCwd, explicitPath);
+        if (!existsSync(abs)) {
+            console.error(`❌ Repo tasks directory not found: ${abs}\n` +
+                "   Provide a valid --repo-tasks-path, or run 'ailf init' to scaffold .ailf/tasks/.");
+            process.exit(1);
+        }
+        return abs;
+    }
+    if (taskSourceType === "repo") {
+        const defaultPath = resolve(callerCwd, ".ailf", "tasks");
+        if (!existsSync(defaultPath)) {
+            console.error(`❌ --task-source=repo was set but no tasks directory was found.\n` +
+                `   Looked for: ${defaultPath}\n` +
+                "   Run 'ailf init' to scaffold .ailf/tasks/, or pass --repo-tasks-path <path>.");
+            process.exit(1);
+        }
+        return defaultPath;
+    }
+    return undefined;
+}
 // ---------------------------------------------------------------------------
 // Pipeline entry point
 // ---------------------------------------------------------------------------
@@ -330,6 +365,16 @@ export async function executePipeline(cliOpts) {
         if (cliOpts.repoTasksPath) {
             config.repoTasksPath = resolve(callerCwd, cliOpts.repoTasksPath);
         }
+        else if (config.taskSourceType === "repo" && !config.repoTasksPath) {
+            // Default: when taskSource=repo but no path set, look in .ailf/tasks/
+            // (matches the `ailf init` scaffold location). Silent fallback here —
+            // composition root will surface a helpful error if the directory is
+            // missing.
+            const defaultPath = resolve(callerCwd, ".ailf", "tasks");
+            if (existsSync(defaultPath)) {
+                config.repoTasksPath = defaultPath;
+            }
+        }
         if (cliOpts.output) {
             config.outputPath = resolve(callerCwd, cliOpts.output);
         }

package/dist/commands/pipeline.js CHANGED Viewed

@@ -51,7 +51,7 @@ export function createPipelineCommand() {
         .option("--output-dir <path>", "Base directory for pipeline output artifacts (default: inferred from execution context)")
         .option("--promptfoo-url <url>", "Promptfoo share URL for report")
         .option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), repo (repo tasks only, no Content Lake merge)", "content-lake")
-        .option("--repo-tasks-path <path>", "Path to repo-based task definitions (.ailf/tasks/ directory)")
+        .option("--repo-tasks-path <path>", "Path to repo-based task definitions. Defaults to ./.ailf/tasks/ when --task-source=repo.")
         .option("--remote", "Submit evaluation to the AILF API instead of running locally", false)
         .option("--api-url <url>", "AILF API base URL (default: https://ailf-api.sanity.build)")
         .option("--no-artifacts", "Disable all artifact writers (D0033). Overrides --artifacts-dir.")

package/dist/commands/validate-tasks.d.ts CHANGED Viewed

@@ -1,9 +1,9 @@
 /**
  * validate-tasks command — standalone validation of task files.
  *
- * Validates .ailf/tasks/*.yaml files against the CanonicalTaskSchema without
- * running the full pipeline. Useful for pre-commit hooks and CI checks
- * in external repos.
+ * Validates .ailf/tasks/*.yaml and .ailf/tasks/*.task.ts files against the
+ * CanonicalTaskSchema without running the full pipeline. Useful for
+ * pre-commit hooks and CI checks in external repos.
  *
  * Usage:
  *   ailf validate-tasks .ailf/tasks/
@@ -11,6 +11,17 @@
  *
  * @see packages/eval/src/adapters/task-sources/repo-schemas.ts
  * @see packages/eval/src/adapters/task-sources/repo-validation.ts
+ * @see packages/eval/src/adapters/task-sources/task-file-loader.ts
  */
 import { Command } from "commander";
+export interface ValidateTasksOptions {
+    strict: boolean;
+    callerCwd?: string;
+}
 export declare function createValidateTasksCommand(): Command;
+/**
+ * Execute the validate-tasks command logic. Returns the exit code (0 success,
+ * 1 failure) so callers can decide how to surface it — the CLI wrapper calls
+ * `process.exit`, tests can assert directly.
+ */
+export declare function runValidateTasks(tasksPath: string, opts: ValidateTasksOptions): Promise<number>;