npm - @sanity/ailf - Versions diffs - 3.3.0 → 3.4.0 - Mend

@sanity/ailf 3.3.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/_vendor/ailf-core/artifact-registry.js +76 -0
package/dist/_vendor/ailf-shared/feature-flags.d.ts +59 -0
package/dist/_vendor/ailf-shared/feature-flags.js +44 -0
package/dist/_vendor/ailf-shared/index.d.ts +1 -0
package/dist/_vendor/ailf-shared/index.js +1 -0
package/dist/adapters/config-sources/ailf-resolver.d.ts +55 -0
package/dist/adapters/config-sources/ailf-resolver.js +147 -0
package/dist/adapters/config-sources/ts-config-loader.js +7 -0
package/dist/adapters/task-sources/repo-schemas.d.ts +35 -5
package/dist/adapters/task-sources/repo-schemas.js +25 -3
package/dist/adapters/task-sources/task-file-loader.js +3 -0
package/dist/commands/init.js +16 -2
package/dist/pipeline/compiler/config-loader.js +6 -1
package/dist/pipeline/compiler/preset-loader.js +3 -0
package/package.json +1 -1

package/dist/_vendor/ailf-core/artifact-registry.js CHANGED Viewed

@@ -320,6 +320,32 @@ const graderPromptPreviewSchema = z.object({
     rubricName: z.string().max(60).optional(),
     snippet: z.string().max(120),
 });
+/**
+ * Preview shape for the run-scoped `pipelineContext` bulk artifact (W0063 /
+ * D0033 M7). Lets the Studio Overview tab render a Pipeline Execution header
+ * row (step count, wall-clock, failed-step badge, quality-gate badge, cache
+ * hit count) without fetching the full context payload — `config` and per-
+ * step detail only land when the panel is expanded.
+ *
+ * Bounds chosen so the worst-case preview fits comfortably under 384 bytes:
+ *   - `failedSteps` is capped at 5 entries with each name ≤ 40 chars. Real
+ *     step names ("fetch-docs", "calculate-scores", "gap-analysis") are 10–
+ *     25 chars; 40 is a defensive ceiling. The array cap exists because
+ *     `fitPreviewToCap` only shortens string fields — an unbounded array
+ *     could push the preview over cap and force it to drop entirely.
+ *     5 is a triage ceiling: the panel shows "showed 5 of N failed steps"
+ *     when `failedSteps.length < stepCount - successCount`, and the full
+ *     per-step list is available in the drilldown payload.
+ *   - `belowCritical` and `cacheHits` are optional — absent on old runs,
+ *     skipped pipelines, or runs without remote-cache telemetry.
+ */
+const pipelineContextPreviewSchema = z.object({
+    stepCount: z.number().int().nonnegative(),
+    totalDurationMs: z.number().nonnegative(),
+    failedSteps: z.array(z.string().max(40)).max(5),
+    belowCritical: z.boolean().optional(),
+    cacheHits: z.number().int().nonnegative().optional(),
+});
 // Aspirational: most payload shapes are still loose. Tightening per-type as
 // consumers stabilize is explicitly a W0050/W0051 concern — W0049 fixes the
 // structural shape around them without changing the payload contracts.
@@ -495,6 +521,56 @@ export const ARTIFACT_REGISTRY = {
         entrySchema: unknownEntry,
         mime: "application/json",
         capBytes: 64_000,
+        manifestPreview: {
+            schema: pipelineContextPreviewSchema,
+            extract: (entry) => {
+                // Producer shape from `capturePipelineContext` in
+                // packages/eval/src/orchestration/pipeline-orchestrator.ts:
+                //   { config, state: { belowCritical, remoteCacheHits, ... },
+                //     steps: [{ name, status: "success"|"failed"|"skipped",
+                //               durationMs? }] }
+                //
+                // `config` and everything else on `state` are drilldown-only and
+                // intentionally absent from the preview — they're what the panel
+                // fetches lazily when expanded.
+                const e = entry;
+                const stepsRaw = Array.isArray(e.steps) ? e.steps : [];
+                let totalDurationMs = 0;
+                const failedSteps = [];
+                let stepCount = 0;
+                for (const raw of stepsRaw) {
+                    if (raw === null || typeof raw !== "object")
+                        continue;
+                    stepCount += 1;
+                    const s = raw;
+                    if (typeof s.durationMs === "number" &&
+                        Number.isFinite(s.durationMs) &&
+                        s.durationMs >= 0) {
+                        totalDurationMs += s.durationMs;
+                    }
+                    if (s.status === "failed" &&
+                        typeof s.name === "string" &&
+                        failedSteps.length < 5) {
+                        failedSteps.push(truncateString(s.name, 40));
+                    }
+                }
+                const belowCritical = typeof e.state?.belowCritical === "boolean"
+                    ? e.state.belowCritical
+                    : undefined;
+                const cacheHitsRaw = e.state?.remoteCacheHits;
+                const cacheHits = Array.isArray(cacheHitsRaw)
+                    ? cacheHitsRaw.length
+                    : undefined;
+                return {
+                    stepCount,
+                    totalDurationMs,
+                    failedSteps,
+                    ...(belowCritical === undefined ? {} : { belowCritical }),
+                    ...(cacheHits === undefined ? {} : { cacheHits }),
+                };
+            },
+            capBytes: 384,
+        },
     }),
     documentManifest: buildDescriptor({
         type: "documentManifest",

package/dist/_vendor/ailf-shared/feature-flags.d.ts ADDED Viewed

@@ -0,0 +1,59 @@
+/**
+ * Feature flags — compile-time UI/feature visibility toggles.
+ *
+ * Single source of truth for "temporary" flags that hide in-flight features,
+ * gate partially-built panels, or carry a known rollback. Each entry carries
+ * the metadata needed to answer "why is this off and when can it go?" so
+ * flags don't rot into undiscoverable tombstones.
+ *
+ * This is intentionally NOT a runtime feature-flag system — no user
+ * segmentation, no A/B, no env-var overrides. Just a typed map of booleans
+ * with audit metadata. Flipping a flag is a code change.
+ *
+ * Adding a flag:
+ *   1. Add an entry below with every metadata field populated.
+ *   2. Import `FEATURE_FLAGS` at the call site and read `.enabled`.
+ *   3. When the re-enable condition is met, remove the entry and the gate.
+ *
+ * See docs/guides/feature-flags.md for the full lifecycle.
+ */
+/** Shape of a single feature-flag entry. All fields required. */
+export interface FeatureFlag {
+    /** Whether the gated feature is visible / active. */
+    readonly enabled: boolean;
+    /** Why the flag exists. Answers "what problem did turning this off solve?" */
+    readonly rationale: string;
+    /** The condition under which this flag should be re-enabled or removed. */
+    readonly reEnableWhen: string;
+    /** ID of the work item that owns the flag's resolution, or null if none. */
+    readonly relatedWorkItem: `W${string}` | null;
+    /** ISO 8601 date (YYYY-MM-DD) the flag was introduced. Used for staleness audits. */
+    readonly addedAt: string;
+}
+/**
+ * Registry of all active feature flags across AILF packages.
+ *
+ * Consumers read values directly:
+ *   if (FEATURE_FLAGS.showFailureModes.enabled) { ... }
+ *
+ * Adding a key here extends the `FeatureFlagKey` union automatically; typos
+ * at call sites fail at compile time.
+ */
+export declare const FEATURE_FLAGS: {
+    readonly showFailureModes: {
+        readonly enabled: false;
+        readonly rationale: "Current classification is too broad (majority \"Unclassified\") to be actionable in the diagnostics view.";
+        readonly reEnableWhen: "Failure taxonomy is refined so non-Unclassified buckets carry meaningful signal.";
+        readonly relatedWorkItem: "W0037-detect-model-output-failures";
+        readonly addedAt: "2026-04-22";
+    };
+    readonly showRegressedSinceLastRun: {
+        readonly enabled: false;
+        readonly rationale: "Bare list of regressed area names lacks explanatory context for why each regressed.";
+        readonly reEnableWhen: "Per-area regression attribution can be surfaced alongside the list.";
+        readonly relatedWorkItem: null;
+        readonly addedAt: "2026-04-22";
+    };
+};
+/** Union of all registered flag keys. Typos at call sites fail at compile time. */
+export type FeatureFlagKey = keyof typeof FEATURE_FLAGS;

package/dist/_vendor/ailf-shared/feature-flags.js ADDED Viewed

@@ -0,0 +1,44 @@
+/**
+ * Feature flags — compile-time UI/feature visibility toggles.
+ *
+ * Single source of truth for "temporary" flags that hide in-flight features,
+ * gate partially-built panels, or carry a known rollback. Each entry carries
+ * the metadata needed to answer "why is this off and when can it go?" so
+ * flags don't rot into undiscoverable tombstones.
+ *
+ * This is intentionally NOT a runtime feature-flag system — no user
+ * segmentation, no A/B, no env-var overrides. Just a typed map of booleans
+ * with audit metadata. Flipping a flag is a code change.
+ *
+ * Adding a flag:
+ *   1. Add an entry below with every metadata field populated.
+ *   2. Import `FEATURE_FLAGS` at the call site and read `.enabled`.
+ *   3. When the re-enable condition is met, remove the entry and the gate.
+ *
+ * See docs/guides/feature-flags.md for the full lifecycle.
+ */
+/**
+ * Registry of all active feature flags across AILF packages.
+ *
+ * Consumers read values directly:
+ *   if (FEATURE_FLAGS.showFailureModes.enabled) { ... }
+ *
+ * Adding a key here extends the `FeatureFlagKey` union automatically; typos
+ * at call sites fail at compile time.
+ */
+export const FEATURE_FLAGS = {
+    showFailureModes: {
+        enabled: false,
+        rationale: 'Current classification is too broad (majority "Unclassified") to be actionable in the diagnostics view.',
+        reEnableWhen: "Failure taxonomy is refined so non-Unclassified buckets carry meaningful signal.",
+        relatedWorkItem: "W0037-detect-model-output-failures",
+        addedAt: "2026-04-22",
+    },
+    showRegressedSinceLastRun: {
+        enabled: false,
+        rationale: "Bare list of regressed area names lacks explanatory context for why each regressed.",
+        reEnableWhen: "Per-area regression attribution can be surfaced alongside the list.",
+        relatedWorkItem: null,
+        addedAt: "2026-04-22",
+    },
+};

package/dist/_vendor/ailf-shared/index.d.ts CHANGED Viewed

@@ -10,6 +10,7 @@
  * @sanity/ailf-studio. It is the leaf of the dependency graph.
  */
 export * from "./document-ref.js";
+export * from "./feature-flags.js";
 export * from "./score-grades.js";
 export * from "./noise-threshold.js";
 export * from "./eval-modes.js";

package/dist/_vendor/ailf-shared/index.js CHANGED Viewed

@@ -10,6 +10,7 @@
  * @sanity/ailf-studio. It is the leaf of the dependency graph.
  */
 export * from "./document-ref.js";
+export * from "./feature-flags.js";
 export * from "./score-grades.js";
 export * from "./noise-threshold.js";
 export * from "./eval-modes.js";

package/dist/adapters/config-sources/ailf-resolver.d.ts ADDED Viewed

@@ -0,0 +1,55 @@
+/**
+ * ailf-resolver.ts — locate `@sanity/ailf` for user TS files, with a bundled fallback.
+ *
+ * User `.ailf/*.ts` files import `defineTask` / `defineConfig` / `definePreset`
+ * from `@sanity/ailf`. In a fresh project with no local install, that bare
+ * specifier cannot resolve from the user's tree. To keep `ailf init` → `ailf
+ * pipeline` working out of the box we transparently fall back to the CLI's own
+ * copy of `@sanity/ailf` by registering a jiti module alias. A user-local
+ * install always wins — the fallback kicks in only when resolution fails.
+ *
+ * All jiti callsites across the eval package use `resolveAilfAlias()` to get
+ * a consistent resolution + warning story. Callers pass the returned map (or
+ * nothing) to `createJiti`.
+ */
+/**
+ * Probe whether the user has `@sanity/ailf` installed as a local dependency
+ * reachable from the given path. Walks up the directory tree looking for a
+ * `node_modules/@sanity/ailf/package.json`. Returns the package entry point
+ * path on success, null otherwise.
+ *
+ * We intentionally do NOT use Node's `require.resolve` self-reference path:
+ * tsx and some bundler setups make it unreliable, and a self-reference
+ * would only match when the caller *is* the `@sanity/ailf` package (the
+ * monorepo devving case), which is semantically the same as having no
+ * install — the bundled fallback handles it.
+ */
+export declare function probeUserLocalAilf(fromPath: string): string | null;
+/**
+ * Return the path to the CLI's own bundled copy of `@sanity/ailf`. Used as the
+ * fallback target when a user's project does not have it installed.
+ *
+ * We walk the filesystem rather than `require.resolve("@sanity/ailf")` because
+ * self-reference resolution is unreliable under tsx and some bundler setups.
+ * Returns null in exotic setups where no ancestor package.json matches.
+ */
+export declare function getBundledAilfPath(): string | null;
+/**
+ * Emit a one-shot stderr advisory when the loader falls back to the bundled
+ * `@sanity/ailf`. The flag is module-scoped so a single pipeline run warns at
+ * most once, no matter how many TS files trigger the fallback.
+ */
+export declare function warnBundledFallbackOnce(): void;
+/** Test-only: reset the warn-once flag between unit tests. */
+export declare function resetBundledFallbackWarning(): void;
+/**
+ * Decide whether jiti should alias `@sanity/ailf` → bundled-path for the given
+ * file. Returns the alias map or null.
+ *
+ * - User-local resolves → returns null (jiti's natural walk finds it).
+ * - User-local fails + bundled path available → returns alias map, fires
+ *   one-shot warning, logs at verbose level.
+ * - User-local fails + no bundled path → returns null (nothing we can do;
+ *   jiti will surface the original MODULE_NOT_FOUND).
+ */
+export declare function resolveAilfAlias(filePath: string): Record<string, string> | null;

package/dist/adapters/config-sources/ailf-resolver.js ADDED Viewed

@@ -0,0 +1,147 @@
+/**
+ * ailf-resolver.ts — locate `@sanity/ailf` for user TS files, with a bundled fallback.
+ *
+ * User `.ailf/*.ts` files import `defineTask` / `defineConfig` / `definePreset`
+ * from `@sanity/ailf`. In a fresh project with no local install, that bare
+ * specifier cannot resolve from the user's tree. To keep `ailf init` → `ailf
+ * pipeline` working out of the box we transparently fall back to the CLI's own
+ * copy of `@sanity/ailf` by registering a jiti module alias. A user-local
+ * install always wins — the fallback kicks in only when resolution fails.
+ *
+ * All jiti callsites across the eval package use `resolveAilfAlias()` to get
+ * a consistent resolution + warning story. Callers pass the returned map (or
+ * nothing) to `createJiti`.
+ */
+import { existsSync, readFileSync, statSync } from "node:fs";
+import { dirname, resolve as pathResolve } from "node:path";
+import { fileURLToPath } from "node:url";
+/**
+ * Walk up from this module's location to find the enclosing `@sanity/ailf`
+ * package root. Works in both dev (source under `packages/eval/src/`) and
+ * production (compiled under `packages/eval/dist/`) because both share the
+ * same package.json anchor.
+ */
+function findAilfPackageRoot() {
+    let dir = dirname(fileURLToPath(import.meta.url));
+    while (dir !== dirname(dir)) {
+        const pkgPath = pathResolve(dir, "package.json");
+        if (existsSync(pkgPath)) {
+            try {
+                const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
+                if (pkg.name === "@sanity/ailf")
+                    return dir;
+            }
+            catch {
+                /* ignore malformed package.json */
+            }
+        }
+        dir = dirname(dir);
+    }
+    return null;
+}
+/**
+ * Probe whether the user has `@sanity/ailf` installed as a local dependency
+ * reachable from the given path. Walks up the directory tree looking for a
+ * `node_modules/@sanity/ailf/package.json`. Returns the package entry point
+ * path on success, null otherwise.
+ *
+ * We intentionally do NOT use Node's `require.resolve` self-reference path:
+ * tsx and some bundler setups make it unreliable, and a self-reference
+ * would only match when the caller *is* the `@sanity/ailf` package (the
+ * monorepo devving case), which is semantically the same as having no
+ * install — the bundled fallback handles it.
+ */
+export function probeUserLocalAilf(fromPath) {
+    let dir;
+    try {
+        dir =
+            existsSync(fromPath) && statSync(fromPath).isDirectory()
+                ? fromPath
+                : dirname(fromPath);
+    }
+    catch {
+        dir = dirname(fromPath);
+    }
+    while (dir !== dirname(dir)) {
+        const pkgJson = pathResolve(dir, "node_modules", "@sanity", "ailf", "package.json");
+        if (existsSync(pkgJson)) {
+            try {
+                const pkg = JSON.parse(readFileSync(pkgJson, "utf-8"));
+                const entry = pkg.module ?? pkg.main ?? "index.js";
+                return pathResolve(dirname(pkgJson), entry);
+            }
+            catch {
+                return null;
+            }
+        }
+        dir = dirname(dir);
+    }
+    return null;
+}
+/**
+ * Return the path to the CLI's own bundled copy of `@sanity/ailf`. Used as the
+ * fallback target when a user's project does not have it installed.
+ *
+ * We walk the filesystem rather than `require.resolve("@sanity/ailf")` because
+ * self-reference resolution is unreliable under tsx and some bundler setups.
+ * Returns null in exotic setups where no ancestor package.json matches.
+ */
+export function getBundledAilfPath() {
+    const pkgRoot = findAilfPackageRoot();
+    if (!pkgRoot)
+        return null;
+    // Production layout: packages/eval/dist/index.js
+    const distEntry = pathResolve(pkgRoot, "dist", "index.js");
+    if (existsSync(distEntry))
+        return distEntry;
+    // Development layout (tsx on source): packages/eval/src/index.ts
+    const srcEntry = pathResolve(pkgRoot, "src", "index.ts");
+    if (existsSync(srcEntry))
+        return srcEntry;
+    return null;
+}
+let hasWarnedOnce = false;
+/**
+ * Emit a one-shot stderr advisory when the loader falls back to the bundled
+ * `@sanity/ailf`. The flag is module-scoped so a single pipeline run warns at
+ * most once, no matter how many TS files trigger the fallback.
+ */
+export function warnBundledFallbackOnce() {
+    if (hasWarnedOnce)
+        return;
+    hasWarnedOnce = true;
+    process.stderr.write("  ⚠ @sanity/ailf is not installed in your project — using the CLI's bundled copy.\n" +
+        "    Pin it locally for reproducibility: npm install -D @sanity/ailf\n");
+}
+/** Test-only: reset the warn-once flag between unit tests. */
+export function resetBundledFallbackWarning() {
+    hasWarnedOnce = false;
+}
+/**
+ * Decide whether jiti should alias `@sanity/ailf` → bundled-path for the given
+ * file. Returns the alias map or null.
+ *
+ * - User-local resolves → returns null (jiti's natural walk finds it).
+ * - User-local fails + bundled path available → returns alias map, fires
+ *   one-shot warning, logs at verbose level.
+ * - User-local fails + no bundled path → returns null (nothing we can do;
+ *   jiti will surface the original MODULE_NOT_FOUND).
+ */
+export function resolveAilfAlias(filePath) {
+    const userLocal = probeUserLocalAilf(filePath);
+    const verbose = process.env.AILF_LOG_LEVEL === "verbose";
+    if (userLocal) {
+        if (verbose) {
+            process.stderr.write(`  [ts-loader] ${filePath} → @sanity/ailf resolved locally at ${userLocal}\n`);
+        }
+        return null;
+    }
+    const bundled = getBundledAilfPath();
+    if (!bundled)
+        return null;
+    if (verbose) {
+        process.stderr.write(`  [ts-loader] ${filePath} → @sanity/ailf not installed locally; using bundled copy at ${bundled}\n`);
+    }
+    warnBundledFallbackOnce();
+    return { "@sanity/ailf": bundled };
+}

package/dist/adapters/config-sources/ts-config-loader.js CHANGED Viewed

@@ -15,6 +15,7 @@
 import { existsSync } from "fs";
 import { pathToFileURL } from "node:url";
 import { createJiti } from "jiti";
+import { resolveAilfAlias } from "./ailf-resolver.js";
 // ---------------------------------------------------------------------------
 // jiti instance factory — resolves imports relative to the loaded file
 // ---------------------------------------------------------------------------
@@ -28,13 +29,19 @@ import { createJiti } from "jiti";
  *
  * We pass a `file://` URL (not a bare path) so jiti uses ESM resolution,
  * which matches the `"import"` condition in package.json exports maps.
+ *
+ * When the user's project cannot resolve `@sanity/ailf` (fresh directory
+ * without a local install), we register an alias pointing at the CLI's own
+ * bundled copy so the load still succeeds. See `ailf-resolver.ts`.
  */
 function createJitiForFile(filePath) {
+    const alias = resolveAilfAlias(filePath);
     return createJiti(pathToFileURL(filePath).href, {
         // Interop: handle both `export default` and `module.exports`
         interopDefault: true,
         // Don't require file extensions in imports
         requireCache: true,
+        ...(alias ? { alias } : {}),
     });
 }
 /**

package/dist/adapters/task-sources/repo-schemas.d.ts CHANGED Viewed

@@ -19,15 +19,17 @@ import { z } from "zod";
 /**
  * The set of assertion types allowed in task files.
  *
- * This is a curated subset of Promptfoo assertion types — we expose only the
- * types that are stable, well-documented, and useful for external authors.
+ * Combines a curated subset of Promptfoo assertion types (stable, well-
+ * documented, useful for external authors) with the agent-harness-specific
+ * types mapped by `mode-handlers/agent-harness/assertions.ts`.
  */
-export declare const CURATED_ASSERTION_TYPES: readonly ["llm-rubric", "contains", "contains-any", "contains-all", "not-contains", "icontains", "icontains-any", "regex", "javascript", "similar", "cost", "latency"];
+export declare const CURATED_ASSERTION_TYPES: readonly ["llm-rubric", "contains", "contains-any", "contains-all", "not-contains", "icontains", "icontains-any", "regex", "javascript", "similar", "cost", "latency", "file-exists", "file-contains", "command-succeeds", "diff-matches"];
 export type CuratedAssertionType = (typeof CURATED_ASSERTION_TYPES)[number];
 /**
- * Valid rubric template names — must match keys in config/rubrics.yaml.
+ * Valid rubric template names — must match template keys in
+ * `packages/eval/config/rubrics.ts`.
  */
-export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage"];
+export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage", "mcp-input-validation", "mcp-output-correctness", "mcp-error-handling", "mcp-security", "factual-correctness", "completeness", "currency", "process-quality", "agent-output", "agent-tool-usage"];
 export type RubricTemplateName = (typeof RUBRIC_TEMPLATE_NAMES)[number];
 /**
  * Zod schema for a single task definition using canonical field names.
@@ -84,6 +86,16 @@ export declare const CanonicalTaskSchema: z.ZodObject<{
             "task-completion": "task-completion";
             "code-correctness": "code-correctness";
             "doc-coverage": "doc-coverage";
+            "mcp-input-validation": "mcp-input-validation";
+            "mcp-output-correctness": "mcp-output-correctness";
+            "mcp-error-handling": "mcp-error-handling";
+            "mcp-security": "mcp-security";
+            "factual-correctness": "factual-correctness";
+            completeness: "completeness";
+            currency: "currency";
+            "process-quality": "process-quality";
+            "agent-output": "agent-output";
+            "agent-tool-usage": "agent-tool-usage";
         }>;
         criteria: z.ZodArray<z.ZodString>;
         weight: z.ZodOptional<z.ZodNumber>;
@@ -101,6 +113,10 @@ export declare const CanonicalTaskSchema: z.ZodObject<{
             similar: "similar";
             cost: "cost";
             latency: "latency";
+            "file-exists": "file-exists";
+            "file-contains": "file-contains";
+            "command-succeeds": "command-succeeds";
+            "diff-matches": "diff-matches";
         }>;
         value: z.ZodOptional<z.ZodUnknown>;
         threshold: z.ZodOptional<z.ZodNumber>;
@@ -174,6 +190,16 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodObject<{
             "task-completion": "task-completion";
             "code-correctness": "code-correctness";
             "doc-coverage": "doc-coverage";
+            "mcp-input-validation": "mcp-input-validation";
+            "mcp-output-correctness": "mcp-output-correctness";
+            "mcp-error-handling": "mcp-error-handling";
+            "mcp-security": "mcp-security";
+            "factual-correctness": "factual-correctness";
+            completeness: "completeness";
+            currency: "currency";
+            "process-quality": "process-quality";
+            "agent-output": "agent-output";
+            "agent-tool-usage": "agent-tool-usage";
         }>;
         criteria: z.ZodArray<z.ZodString>;
         weight: z.ZodOptional<z.ZodNumber>;
@@ -191,6 +217,10 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodObject<{
             similar: "similar";
             cost: "cost";
             latency: "latency";
+            "file-exists": "file-exists";
+            "file-contains": "file-contains";
+            "command-succeeds": "command-succeeds";
+            "diff-matches": "diff-matches";
         }>;
         value: z.ZodOptional<z.ZodUnknown>;
         threshold: z.ZodOptional<z.ZodNumber>;

package/dist/adapters/task-sources/repo-schemas.js CHANGED Viewed

@@ -22,8 +22,9 @@ import { z } from "zod";
 /**
  * The set of assertion types allowed in task files.
  *
- * This is a curated subset of Promptfoo assertion types — we expose only the
- * types that are stable, well-documented, and useful for external authors.
+ * Combines a curated subset of Promptfoo assertion types (stable, well-
+ * documented, useful for external authors) with the agent-harness-specific
+ * types mapped by `mode-handlers/agent-harness/assertions.ts`.
  */
 export const CURATED_ASSERTION_TYPES = [
     "llm-rubric",
@@ -38,14 +39,35 @@ export const CURATED_ASSERTION_TYPES = [
     "similar",
     "cost",
     "latency",
+    // Agent-harness assertions — verify sandbox state after the agent runs.
+    // See src/pipeline/compiler/mode-handlers/agent-harness/assertions.ts
+    "file-exists",
+    "file-contains",
+    "command-succeeds",
+    "diff-matches",
 ];
 /**
- * Valid rubric template names — must match keys in config/rubrics.yaml.
+ * Valid rubric template names — must match template keys in
+ * `packages/eval/config/rubrics.ts`.
  */
 export const RUBRIC_TEMPLATE_NAMES = [
+    // Core literacy dimensions
     "task-completion",
     "code-correctness",
     "doc-coverage",
+    // MCP server dimensions
+    "mcp-input-validation",
+    "mcp-output-correctness",
+    "mcp-error-handling",
+    "mcp-security",
+    // Knowledge probe dimensions
+    "factual-correctness",
+    "completeness",
+    "currency",
+    // Agent harness dimensions
+    "process-quality",
+    "agent-output",
+    "agent-tool-usage",
 ];
 // ---------------------------------------------------------------------------
 // Doc ref schemas — polymorphic canonical doc references

package/dist/adapters/task-sources/task-file-loader.js CHANGED Viewed

@@ -25,6 +25,7 @@ import { existsSync, readdirSync } from "fs";
 import { pathToFileURL } from "node:url";
 import { resolve } from "path";
 import { createJiti } from "jiti";
+import { resolveAilfAlias } from "../config-sources/ailf-resolver.js";
 import { loadTsConfig } from "../config-sources/ts-config-loader.js";
 /**
  * Discover TS/JS task files in a directory.
@@ -72,9 +73,11 @@ export async function loadTsTaskFile(filePath) {
  * Needed by resolve-mappings.ts which is called from sync contexts.
  */
 export function loadTsTaskFileSync(filePath) {
+    const alias = resolveAilfAlias(filePath);
     const jiti = createJiti(pathToFileURL(filePath).href, {
         interopDefault: true,
         requireCache: true,
+        ...(alias ? { alias } : {}),
     });
     const mod = jiti(filePath);
     const value = mod && typeof mod === "object" && "default" in mod ? mod.default : mod;

package/dist/commands/init.js CHANGED Viewed

@@ -20,6 +20,7 @@ import { Command } from "commander";
 import { existsSync, mkdirSync, writeFileSync } from "fs";
 import { resolve, relative } from "path";
 import { ailfConfigData, ailfConfigYaml, ailfConfigTs, taskYamlFiles, taskTsFiles, TASK_FILE_NAMES, TASK_EXAMPLES, allTaskData, workflowYaml, } from "../_vendor/ailf-core/index.js";
+import { probeUserLocalAilf } from "../adapters/config-sources/ailf-resolver.js";
 // ---------------------------------------------------------------------------
 // Command factory
 // ---------------------------------------------------------------------------
@@ -82,6 +83,13 @@ async function runInit(opts) {
     console.log();
     console.log("  🚀 Initializing AI Literacy Framework");
     console.log();
+    if (format === "ts" && !probeUserLocalAilf(targetDir)) {
+        console.log("  ℹ @sanity/ailf is not installed in this project yet.");
+        console.log("    For reproducibility and IDE autocomplete, install it after init:");
+        console.log("      npm install -D @sanity/ailf   (or pnpm add -D, yarn add -D)");
+        console.log("    The pipeline will fall back to the CLI's bundled copy until you do.");
+        console.log();
+    }
     // 1. Create directories
     mkdirSync(tasksDir, { recursive: true });
     console.log(`  ✓ Created ${rel(targetDir, ailfDir)}/`);
@@ -252,7 +260,7 @@ async function runInit(opts) {
     console.log(`  2. Validate locally: npx @sanity/ailf@latest validate-tasks .ailf/tasks/`);
     console.log("  3. Add a GitHub Actions secret");
     console.log("     (Settings → Secrets and variables → Actions):");
-    console.log("     • AILF_API_KEY — your API key (starts with ailf_live_sk_)");
+    console.log("     • AILF_API_KEY — your API key");
     console.log("  4. Push — the workflow at .github/workflows/ailf-eval.yml runs");
     console.log("     automatically on PRs");
     if (format === "ts") {
@@ -268,9 +276,15 @@ async function runInit(opts) {
     console.log();
     console.log("     Not a Sanity employee? Request an API key from the AILF team.");
     console.log();
-    console.log("  💡 Test locally before pushing:");
+    console.log("  💡 Test a remote run (executes against the AILF API) before pushing:");
     console.log("     AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --debug");
     console.log();
+    console.log("  💡 Or test a remote run against your repo tasks:");
+    console.log("     AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --task-source=repo --debug");
+    console.log();
+    console.log("  💡 Or run locally against your repo tasks:");
+    console.log("     AILF_API_KEY=... npx @sanity/ailf@latest pipeline --mode=literacy --variant=full --task-source=repo --debug --explain -y");
+    console.log();
 }
 // ---------------------------------------------------------------------------
 // Custom preset scaffold template

package/dist/pipeline/compiler/config-loader.js CHANGED Viewed

@@ -25,6 +25,7 @@ import { createRequire } from "module";
 import { existsSync, readFileSync } from "fs";
 import { load } from "js-yaml";
 import { resolve } from "path";
+import { resolveAilfAlias } from "../../adapters/config-sources/ailf-resolver.js";
 /**
  * Load a config file by name, searching for TS/JS/YAML/JSON variants.
  *
@@ -134,7 +135,11 @@ function loadTsFile(filePath, format) {
     // jiti supports sync loading. Use createRequire for ESM compatibility.
     const esmRequire = createRequire(import.meta.url);
     const { createJiti } = esmRequire("jiti");
-    const jiti = createJiti(filePath, { interopDefault: true });
+    const alias = resolveAilfAlias(filePath);
+    const jiti = createJiti(filePath, {
+        interopDefault: true,
+        ...(alias ? { alias } : {}),
+    });
     const mod = jiti(filePath);
     const data = (mod?.default ?? mod);
     return { data, filePath, format };

package/dist/pipeline/compiler/preset-loader.js CHANGED Viewed

@@ -14,6 +14,7 @@ import { existsSync } from "fs";
 import { resolve } from "path";
 import { pathToFileURL } from "url";
 import { createJiti } from "jiti";
+import { resolveAilfAlias } from "../../adapters/config-sources/ailf-resolver.js";
 /** Thrown for preset-specific load errors (distinguishes from third-party errors) */
 class PresetLoadError extends Error {
     constructor(message) {
@@ -53,9 +54,11 @@ function loadSinglePreset(ref, rootDir) {
         }
     }
     try {
+        const alias = resolveAilfAlias(filePath);
         const jiti = createJiti(pathToFileURL(rootDir).href, {
             interopDefault: true,
             requireCache: true,
+            ...(alias ? { alias } : {}),
         });
         // jiti() is the synchronous loader
         const mod = jiti(filePath);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sanity/ailf",
-  "version": "3.3.0",
+  "version": "3.4.0",
   "private": false,
   "publishConfig": {
     "access": "public"