npm - @sanity/ailf - Versions diffs - 0.5.0 → 2.0.0 - Mend

@sanity/ailf 0.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (377) hide show

package/dist/pipeline/plan.js CHANGED Viewed

@@ -9,13 +9,15 @@
  *
  * @see docs/exec-plans/execution-preview.md
  */
-import { existsSync, readFileSync, readdirSync, statSync } from "fs";
+import { existsSync, readdirSync, statSync } from "fs";
 import { resolve } from "path";
-import { load } from "js-yaml";
 import { lookupPricing } from "../agent-observer/pricing.js";
 import { RepoTaskSource } from "../adapters/task-sources/repo-task-source.js";
+import { loadAllTsTaskFiles } from "../adapters/task-sources/task-file-loader.js";
 import { lookupCache } from "./cache.js";
-import { loadAndExpandTasks } from "./expand-tasks.js";
+import { compileLiteracyTasks } from "./compiler/literacy-bridge.js";
+import { tryLoadConfigFile } from "./compiler/config-loader.js";
+import { LiteracyVariant } from "./normalize-mode.js";
 import { validateConfiguration } from "./validate.js";
 /**
  * Known promptfoo provider prefixes — stripped to get the raw model name.
@@ -39,38 +41,37 @@ function extractModelName(id) {
     return parts.length > 1 ? parts.slice(1).join(":") : id;
 }
 function loadModelsFile(rootDir) {
-    const modelsPath = resolve(rootDir, "config", "models.yaml");
-    if (!existsSync(modelsPath))
-        return null;
-    try {
-        const raw = readFileSync(modelsPath, "utf-8");
-        return load(raw);
-    }
-    catch {
-        return null;
-    }
+    const result = tryLoadConfigFile("models", rootDir);
+    return result?.data ?? null;
 }
 /**
- * Map eval mode to the model "modes" array values from models.yaml.
- * Baseline mode maps to "baseline"; agentic maps to both naive and optimized.
+ * Map eval mode + variant to the model "modes" array values from models config.
+ *
+ * Literacy mode uses the variant to determine which model sub-modes match.
+ * Non-literacy modes accept all models by default (filtering is done
+ * elsewhere for those modes).
  */
-function modeMatchesModelModes(mode, modelModes) {
+function modeMatchesModelModes(mode, modelModes, variant) {
     if (!modelModes || modelModes.length === 0)
         return true;
-    switch (mode) {
-        case "agentic":
-            return (modelModes.includes("agentic-naive") ||
-                modelModes.includes("agentic-optimized"));
-        case "baseline":
-            return modelModes.includes("baseline");
-        case "full":
-            // Full mode uses all models — a model matches if it's in any sub-mode
-            return (modelModes.includes("baseline") ||
-                modelModes.includes("agentic-naive") ||
-                modelModes.includes("agentic-optimized"));
-        case "observed":
-            return modelModes.includes("observed");
+    if (mode === "literacy") {
+        switch (variant) {
+            case LiteracyVariant.AGENTIC:
+                return (modelModes.includes("agentic-naive") ||
+                    modelModes.includes("agentic-optimized"));
+            case LiteracyVariant.OBSERVED:
+                return modelModes.includes(LiteracyVariant.OBSERVED);
+            case LiteracyVariant.FULL:
+                return (modelModes.includes(LiteracyVariant.STANDARD) ||
+                    modelModes.includes("agentic-naive") ||
+                    modelModes.includes("agentic-optimized"));
+            case LiteracyVariant.STANDARD:
+            default:
+                return modelModes.includes(LiteracyVariant.STANDARD);
+        }
     }
+    // Non-literacy modes accept all models by default
+    return true;
 }
 // ---------------------------------------------------------------------------
 // Cost estimation
@@ -131,46 +132,103 @@ export async function buildPipelinePlan(opts, rootDir) {
     let totalTests = 0;
     let tasks = [];
     let repoTaskCount;
+    // -----------------------------------------------------------------------
+    // Load and compile tasks — unified path for all modes
+    // -----------------------------------------------------------------------
     try {
-        const { entries } = loadAndExpandTasks(rootDir, filter);
-        totalTests = entries.length;
-        tasks = entries.map((entry) => {
-            const desc = typeof entry.description === "string" ? entry.description : "(unknown)";
-            const isBaseline = desc.includes("[Baseline]") ||
-                desc.endsWith("(baseline)") ||
-                (Array.isArray(entry.prompts) && entry.prompts.includes("without-docs"));
-            return {
-                description: desc,
-                variant: isBaseline ? "baseline" : "gold",
-            };
-        });
+        const modelsForCompile = loadModelsFile(rootDir);
+        const graderProvider = modelsForCompile?.grader?.id ?? "openai:chat:gpt-4o";
+        const modelEntries = (modelsForCompile?.models ?? []).map((m) => ({ id: m.id, label: m.label }));
+        // Load *.task.ts files from tasks/<mode>/
+        const modeTasksDir = resolve(rootDir, "tasks", opts.mode);
+        if (existsSync(modeTasksDir)) {
+            const rawTasks = await loadAllTsTaskFiles(modeTasksDir);
+            if (rawTasks.length > 0) {
+                // Dynamic import of the handler module
+                const handlerModulePath = `./compiler/mode-handlers/${opts.mode}/index.js`;
+                const mod = await import(handlerModulePath);
+                const handler = mod.handler;
+                for (const rawFile of rawTasks) {
+                    for (const taskDef of rawFile.tasks) {
+                        const task = taskDef;
+                        // Apply area/task/tag filter
+                        if (filter) {
+                            if (filter.areas?.length &&
+                                !filter.areas
+                                    .map((a) => a.toLowerCase())
+                                    .includes((task.area ?? "").toLowerCase()))
+                                continue;
+                            if (filter.taskIds?.length && !filter.taskIds.includes(task.id))
+                                continue;
+                            if (filter.tags?.length &&
+                                (!task.tags || !task.tags.some((t) => filter.tags.includes(t))))
+                                continue;
+                        }
+                        const result = handler.compileTask(task, {
+                            rootDir,
+                            graderProvider,
+                            models: modelEntries,
+                            // For literacy mode, pass the variant as evalMode
+                            ...(opts.mode === "literacy"
+                                ? {
+                                    evalMode: opts.variant === LiteracyVariant.AGENTIC
+                                        ? LiteracyVariant.AGENTIC
+                                        : LiteracyVariant.STANDARD,
+                                }
+                                : {}),
+                        });
+                        totalTests += result.tests.length;
+                        for (const test of result.tests) {
+                            const desc = typeof test.description === "string"
+                                ? test.description
+                                : (taskDef.id ?? "unknown");
+                            const isBaseline = desc.includes("[Baseline]") || desc.endsWith("(baseline)");
+                            tasks.push({
+                                description: desc,
+                                variant: isBaseline
+                                    ? LiteracyVariant.STANDARD
+                                    : "gold",
+                            });
+                        }
+                    }
+                }
+            }
+        }
     }
-    catch {
-        errors.push("Failed to expand tasks — check task YAML files");
+    catch (err) {
+        const detail = err instanceof Error ? err.message : String(err);
+        errors.push(`Failed to compile tasks: ${detail}`);
     }
     // Scan repo tasks path for additional task count (preview only)
     if (opts.repoTasksPath) {
         try {
             const repoSource = new RepoTaskSource(opts.repoTasksPath);
-            const repoTasks = await repoSource.loadTasks(filter);
+            // Type-narrow to literacy tasks — compileLiteracyTasks accepts LiteracyTaskDefinition[]
+            const repoTasks = (await repoSource.loadTasks(filter)).filter((t) => t.mode === "literacy");
             repoTaskCount = repoTasks.length;
             if (repoTaskCount > 0) {
-                // Expand repo tasks to estimate test entries
-                const { expandTaskDefinitions } = await import("./expand-tasks.js");
-                const { entries: repoEntries } = expandTaskDefinitions(repoTasks, rootDir, opts.mode === "agentic" ? "agentic" : "baseline");
-                totalTests += repoEntries.length;
-                for (const entry of repoEntries) {
-                    const desc = typeof entry.description === "string"
-                        ? entry.description
-                        : "(unknown)";
-                    const isBaseline = desc.includes("[Baseline]") ||
-                        desc.endsWith("(baseline)") ||
-                        (Array.isArray(entry.prompts) &&
-                            entry.prompts.includes("without-docs"));
-                    tasks.push({
-                        description: desc,
-                        variant: isBaseline ? "baseline" : "gold",
-                    });
+                const modelsForCompile = loadModelsFile(rootDir);
+                const graderProvider = modelsForCompile?.grader?.id ?? "openai:chat:gpt-4o";
+                const compileResult = compileLiteracyTasks(repoTasks, {
+                    rootDir,
+                    evalMode: opts.variant === LiteracyVariant.AGENTIC
+                        ? LiteracyVariant.AGENTIC
+                        : LiteracyVariant.STANDARD,
+                    graderProvider,
+                    models: (modelsForCompile?.models ?? []).map((m) => ({ id: m.id, label: m.label })),
+                });
+                totalTests += compileResult.totalTests;
+                for (const { taskId, result } of compileResult.tasks) {
+                    for (const test of result.tests) {
+                        const desc = typeof test.description === "string" ? test.description : taskId;
+                        const isBaseline = desc.includes("[Baseline]") || desc.endsWith("(baseline)");
+                        tasks.push({
+                            description: desc,
+                            variant: isBaseline
+                                ? LiteracyVariant.STANDARD
+                                : "gold",
+                        });
+                    }
                 }
             }
         }
@@ -186,11 +244,11 @@ export async function buildPipelinePlan(opts, rootDir) {
     const models = [];
     let graderModelName = "";
     if (modelsFile) {
-        const activeModels = modelsFile.models.filter((m) => modeMatchesModelModes(opts.mode, m.modes));
+        const activeModels = modelsFile.models.filter((m) => modeMatchesModelModes(opts.mode, m.modes, opts.variant));
         // For agentic mode, each model appears twice (naive + optimized)
         for (const m of activeModels) {
             const modelName = extractModelName(m.id);
-            if (opts.mode === "agentic") {
+            if (opts.variant === LiteracyVariant.AGENTIC) {
                 if (m.modes?.includes("agentic-naive")) {
                     models.push({
                         id: m.id,
@@ -518,16 +576,16 @@ function collectFilesCreated(opts) {
 // ---------------------------------------------------------------------------
 function collectFilesRead(rootDir, _mode) {
     const files = [
-        "config/models.yaml",
-        "config/rubrics.yaml",
-        "config/prompts.yaml",
-        "config/sources.yaml",
+        "config/models.ts",
+        "config/rubrics.ts",
+        "config/prompts.ts",
+        "config/sources.ts",
     ];
     // Task files
     const tasksDir = resolve(rootDir, "tasks");
     if (existsSync(tasksDir)) {
         const taskFiles = readdirSync(tasksDir)
-            .filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."))
+            .filter((f) => /\.(yaml|yml|task\.ts|task\.js)$/.test(f) && !f.startsWith("."))
             .sort();
         for (const f of taskFiles)
             files.push(`tasks/${f}`);
@@ -551,11 +609,11 @@ function collectFilesRead(rootDir, _mode) {
             files.push(`canonical/reference-solutions/${f}`);
     }
     // Thresholds (if readiness is involved)
-    if (existsSync(resolve(rootDir, "config", "thresholds.yaml"))) {
-        files.push("config/thresholds.yaml");
+    if (existsSync(resolve(rootDir, "config", "thresholds.ts"))) {
+        files.push("config/thresholds.ts");
     }
-    if (existsSync(resolve(rootDir, "config", "features.yaml"))) {
-        files.push("config/features.yaml");
+    if (existsSync(resolve(rootDir, "config", "features.ts"))) {
+        files.push("config/features.ts");
     }
     return [...new Set(files)].sort();
 }
@@ -616,16 +674,14 @@ function estimateCost(testCount, models, graderModelName, rubricAssertionsPerTas
 // Used by the plan builder without importing the full type to avoid circular deps.
 // ---------------------------------------------------------------------------
 function estimateRubricAssertionsPerTask(rootDir) {
-    // Load rubrics.yaml and count the default template set.
+    // Load rubrics config and count the default template set.
     // In practice, most tasks have 2-4 rubric assertions.
-    const rubricsPath = resolve(rootDir, "config", "rubrics.yaml");
-    if (!existsSync(rubricsPath))
+    const result = tryLoadConfigFile("rubrics", rootDir);
+    if (!result)
         return 2; // conservative default
     try {
-        const raw = readFileSync(rubricsPath, "utf-8");
-        const data = load(raw);
-        const templateCount = data?.templates
-            ? Object.keys(data.templates).length
+        const templateCount = result.data?.templates
+            ? Object.keys(result.data.templates).length
             : 2;
         // Most tasks use 2-3 of the available templates
         return Math.min(templateCount, 3);

package/dist/pipeline/pr-comment.js CHANGED Viewed

@@ -320,6 +320,8 @@ function generateComment(summary, options = {}) {
                         ? "📉"
                         : "➡️";
                 const d = (n) => n > 0 ? `+${Math.round(n)}` : String(Math.round(n));
+                // TODO(multi-mode): These dimension keys are literacy-specific.
+                // For other modes, iterate Object.entries(a.dimensions) dynamically.
                 lines.push(`| ${a.area} | ${a.baseline} | ${a.experiment} | ${icon} ${d(a.delta)} | ${d(a.dimensions.taskCompletion.delta)} | ${d(a.dimensions.codeCorrectness.delta)} | ${d(a.dimensions.docCoverage.delta)} |`);
             }
         }

package/dist/pipeline/profile-resolution.d.ts CHANGED Viewed

@@ -1,13 +1,16 @@
 /**
  * pipeline/profile-resolution.ts
  *
- * Resolves the correct weight profile for a given (mode, variant) pair.
- * The scoring engine calls this to determine which dimensions and weights
- * apply to each test entry's composite score.
+ * Resolves the correct weight profile for a given (mode, perspective, variant)
+ * tuple. The scoring engine calls this to determine which dimensions and
+ * weights apply to each test entry's composite score.
  *
  * Resolution order:
- *   1. Explicit binding: mode-profiles.<mode>.<variant> → profile name
- *   2. Fallback: the "default" profile
+ *   1. Nested binding (variant provided):
+ *      mode-profiles.<mode>.<variant>.<perspective> → profile name
+ *   2. Flat binding (no variant):
+ *      mode-profiles.<mode>.<perspective> → profile name
+ *   3. Fallback: the "default" profile
  *
  * Supports both the new `profiles` format and the legacy flat `weights`
  * format (treated as a single profile named "default").
@@ -23,17 +26,22 @@ import type { RubricConfig, WeightProfile } from "../_vendor/ailf-core/index.d.t
  */
 export declare function resolveProfiles(config: RubricConfig): Record<string, WeightProfile>;
 /**
- * Resolve the weight profile for a specific (mode, variant) pair.
+ * Resolve the weight profile for a specific (mode, perspective, variant) tuple.
  *
- * @param mode    - Evaluation mode (e.g., "baseline", "agentic", "agent-task")
- * @param variant - Entry variant: "gold" (with docs) or "baseline" (without docs)
- * @param config  - Parsed rubrics.yaml config
+ * @param mode        - Canonical mode (e.g., "literacy", "mcp-server")
+ * @param perspective - Entry perspective: "gold" (with docs) or "baseline" (without docs)
+ * @param config      - Parsed rubrics config
+ * @param variant     - Optional variant within the mode (e.g., "baseline", "agentic" for literacy)
  * @returns The resolved weight profile (dimension → weight map)
  *
  * @example
- *   resolveProfile("baseline", "gold", config)     // → default profile
- *   resolveProfile("baseline", "baseline", config)  // → output-only profile
- *   resolveProfile("agentic", "gold", config)       // → default profile
- *   resolveProfile("unknown-mode", "gold", config)  // → default (fallback)
+ *   // Nested: literacy mode with variant sub-keys
+ *   resolveProfile("literacy", "gold", config, "baseline")     // → default profile
+ *   resolveProfile("literacy", "baseline", config, "baseline")  // → output-only profile
+ *   resolveProfile("literacy", "gold", config, "agentic")       // → default profile
+ *
+ *   // Flat: non-literacy modes
+ *   resolveProfile("mcp-server", "gold", config)                // → mcp-behavior profile
+ *   resolveProfile("unknown-mode", "gold", config)              // → default (fallback)
  */
-export declare function resolveProfile(mode: string, variant: string, config: RubricConfig): WeightProfile;
+export declare function resolveProfile(mode: string, perspective: string, config: RubricConfig, variant?: string): WeightProfile;

package/dist/pipeline/profile-resolution.js CHANGED Viewed

@@ -1,13 +1,16 @@
 /**
  * pipeline/profile-resolution.ts
  *
- * Resolves the correct weight profile for a given (mode, variant) pair.
- * The scoring engine calls this to determine which dimensions and weights
- * apply to each test entry's composite score.
+ * Resolves the correct weight profile for a given (mode, perspective, variant)
+ * tuple. The scoring engine calls this to determine which dimensions and
+ * weights apply to each test entry's composite score.
  *
  * Resolution order:
- *   1. Explicit binding: mode-profiles.<mode>.<variant> → profile name
- *   2. Fallback: the "default" profile
+ *   1. Nested binding (variant provided):
+ *      mode-profiles.<mode>.<variant>.<perspective> → profile name
+ *   2. Flat binding (no variant):
+ *      mode-profiles.<mode>.<perspective> → profile name
+ *   3. Fallback: the "default" profile
  *
  * Supports both the new `profiles` format and the legacy flat `weights`
  * format (treated as a single profile named "default").
@@ -29,31 +32,50 @@ export function resolveProfiles(config) {
         return { default: config.weights };
     }
     // Schema validation should prevent this, but be defensive
-    throw new Error("rubrics.yaml has neither 'profiles' nor 'weights' — cannot resolve scoring profiles");
+    throw new Error("rubrics config has neither 'profiles' nor 'weights' — cannot resolve scoring profiles");
 }
 /**
- * Resolve the weight profile for a specific (mode, variant) pair.
+ * Resolve the weight profile for a specific (mode, perspective, variant) tuple.
  *
- * @param mode    - Evaluation mode (e.g., "baseline", "agentic", "agent-task")
- * @param variant - Entry variant: "gold" (with docs) or "baseline" (without docs)
- * @param config  - Parsed rubrics.yaml config
+ * @param mode        - Canonical mode (e.g., "literacy", "mcp-server")
+ * @param perspective - Entry perspective: "gold" (with docs) or "baseline" (without docs)
+ * @param config      - Parsed rubrics config
+ * @param variant     - Optional variant within the mode (e.g., "baseline", "agentic" for literacy)
  * @returns The resolved weight profile (dimension → weight map)
  *
  * @example
- *   resolveProfile("baseline", "gold", config)     // → default profile
- *   resolveProfile("baseline", "baseline", config)  // → output-only profile
- *   resolveProfile("agentic", "gold", config)       // → default profile
- *   resolveProfile("unknown-mode", "gold", config)  // → default (fallback)
+ *   // Nested: literacy mode with variant sub-keys
+ *   resolveProfile("literacy", "gold", config, "baseline")     // → default profile
+ *   resolveProfile("literacy", "baseline", config, "baseline")  // → output-only profile
+ *   resolveProfile("literacy", "gold", config, "agentic")       // → default profile
+ *
+ *   // Flat: non-literacy modes
+ *   resolveProfile("mcp-server", "gold", config)                // → mcp-behavior profile
+ *   resolveProfile("unknown-mode", "gold", config)              // → default (fallback)
  */
-export function resolveProfile(mode, variant, config) {
+export function resolveProfile(mode, perspective, config, variant) {
     const profiles = resolveProfiles(config);
     const modeProfiles = config["mode-profiles"];
-    // Look up explicit binding: mode-profiles.<mode>.<variant> → profile name
-    const profileName = modeProfiles?.[mode]?.[variant];
+    const modeEntry = modeProfiles?.[mode];
+    let profileName;
+    if (modeEntry && variant) {
+        // Nested lookup: mode-profiles.<mode>.<variant>.<perspective>
+        const variantEntry = modeEntry[variant];
+        if (typeof variantEntry === "object" && variantEntry !== null) {
+            profileName = variantEntry[perspective];
+        }
+    }
+    if (!profileName && modeEntry) {
+        // Flat lookup: mode-profiles.<mode>.<perspective>
+        const directEntry = modeEntry[perspective];
+        if (typeof directEntry === "string") {
+            profileName = directEntry;
+        }
+    }
     if (profileName) {
         const profile = profiles[profileName];
         if (!profile) {
-            throw new Error(`mode-profiles.${mode}.${variant} references profile "${profileName}" ` +
+            throw new Error(`mode-profiles.${mode}.${variant ? variant + "." : ""}${perspective} references profile "${profileName}" ` +
                 `which does not exist. Available profiles: ${Object.keys(profiles).join(", ")}`);
         }
         return profile;
@@ -61,7 +83,7 @@ export function resolveProfile(mode, variant, config) {
     // Fall back to "default" profile
     const defaultProfile = profiles["default"];
     if (!defaultProfile) {
-        throw new Error(`No scoring profile found for mode="${mode}" variant="${variant}" ` +
+        throw new Error(`No scoring profile found for mode="${mode}" perspective="${perspective}" ` +
             `and no "default" profile exists. ` +
             `Available profiles: ${Object.keys(profiles).join(", ")}`);
     }

package/dist/pipeline/provenance.d.ts CHANGED Viewed

@@ -42,7 +42,7 @@ export interface ProvenanceInput {
     promptfooUrl?: string;
     /** Per-mode Promptfoo share URLs */
     promptfooUrls?: PromptfooUrlEntry[];
-    /** Path to the package root (for reading models.yaml) */
+    /** Path to the package root (for reading config/models) */
     rootDir: string;
     /** Report ID that triggered this re-run (becomes lineage.rerunOf) */
     sourceReportId?: string;
@@ -58,7 +58,7 @@ export interface ProvenanceInput {
  *
  * Assembles provenance from:
  * - Pipeline options (mode, source, areas, tasks)
- * - config/models.yaml (model list, grader)
+ * - config/models.ts (model list, grader)
  * - Environment variables (CI metadata, trigger detection)
  * - Optional metadata (context hash, Promptfoo URL)
  */

package/dist/pipeline/provenance.js CHANGED Viewed

@@ -11,16 +11,14 @@
  * @see docs/design-docs/report-store/domain-model.md
  * @see docs/design-docs/report-store/architecture.md — Provenance collection
  */
-import { readFileSync } from "fs";
-import { resolve } from "path";
-import { load } from "js-yaml";
 import { ConsoleLogger } from "../adapters/loggers/index.js";
+import { tryLoadConfigFile } from "./compiler/config-loader.js";
 /**
  * Build a ReportProvenance object from pipeline context.
  *
  * Assembles provenance from:
  * - Pipeline options (mode, source, areas, tasks)
- * - config/models.yaml (model list, grader)
+ * - config/models.ts (model list, grader)
  * - Environment variables (CI metadata, trigger detection)
  * - Optional metadata (context hash, Promptfoo URL)
  */
@@ -168,20 +166,17 @@ function detectTrigger() {
 // Model config loading
 // ---------------------------------------------------------------------------
 /**
- * Load config/models.yaml to extract model list and grader info.
+ * Load config/models to extract model list and grader info.
  * Falls back to a minimal config if the file can't be read.
  */
 function loadModelsConfig(rootDir, log) {
-    try {
-        const content = readFileSync(resolve(rootDir, "config", "models.yaml"), "utf-8");
-        return load(content);
-    }
-    catch {
-        log.warn("Could not read config/models.yaml for provenance");
-        return {
-            defaults: {},
-            grader: { id: "unknown" },
-            models: [],
-        };
-    }
+    const result = tryLoadConfigFile("models", rootDir);
+    if (result)
+        return result.data;
+    log.warn("Could not read config/models for provenance");
+    return {
+        defaults: {},
+        grader: { id: "unknown" },
+        models: [],
+    };
 }

package/dist/pipeline/release-report.js CHANGED Viewed

@@ -133,8 +133,8 @@ export function formatReleaseImpactConsole(report) {
                 const docs = task.attributedDocs.length > 0
                     ? task.attributedDocs.join(", ")
                     : "(unattributed)";
-                const deltaStr = task.delta >= 0 ? `+${task.delta.toFixed(1)}` : task.delta.toFixed(1);
-                lines.push(`  ${docs.padEnd(32)} | ${area.area.padEnd(16)} | ${task.taskId.padEnd(23)} | ${deltaStr}`);
+                const taskDeltaStr = task.delta >= 0 ? `+${task.delta.toFixed(1)}` : task.delta.toFixed(1);
+                lines.push(`  ${docs.padEnd(32)} | ${area.area.padEnd(16)} | ${task.taskId.padEnd(23)} | ${taskDeltaStr}`);
             }
         }
         lines.push("");
@@ -194,9 +194,9 @@ export function formatReleaseImpactMarkdown(report) {
                 const docs = task.attributedDocs.length > 0
                     ? task.attributedDocs.map((d) => `\`${d}\``).join(", ")
                     : "—";
-                const deltaStr = task.delta >= 0 ? `+${task.delta.toFixed(1)}` : task.delta.toFixed(1);
+                const taskDeltaStr = task.delta >= 0 ? `+${task.delta.toFixed(1)}` : task.delta.toFixed(1);
                 const regressIcon = area.regressed ? " ⚠️" : "";
-                lines.push(`| ${docs} | ${area.area} | ${task.taskId} | ${deltaStr}${regressIcon} |`);
+                lines.push(`| ${docs} | ${area.area} | ${task.taskId} | ${taskDeltaStr}${regressIcon} |`);
             }
         }
         lines.push("");

package/dist/pipeline/repo-threshold-evaluator.d.ts CHANGED Viewed

@@ -5,7 +5,7 @@
  * definitions (`.ailf/tasks/*.yaml` → `execution.threshold`).
  *
  * This is distinct from the readiness-gate threshold system in
- * `config/thresholds.yaml`. Repo thresholds are per-task, defined by
+ * `config/thresholds`. Repo thresholds are per-task, defined by
  * the product team, and drive PR check pass/fail status. Framework
  * thresholds are per-area, defined by the AILF team, and drive
  * readiness reports.

package/dist/pipeline/repo-threshold-evaluator.js CHANGED Viewed

@@ -5,7 +5,7 @@
  * definitions (`.ailf/tasks/*.yaml` → `execution.threshold`).
  *
  * This is distinct from the readiness-gate threshold system in
- * `config/thresholds.yaml`. Repo thresholds are per-task, defined by
+ * `config/thresholds`. Repo thresholds are per-task, defined by
  * the product team, and drive PR check pass/fail status. Framework
  * thresholds are per-area, defined by the AILF team, and drive
  * readiness reports.

package/dist/pipeline/rubric-loader.d.ts ADDED Viewed

@@ -0,0 +1,20 @@
+/**
+ * pipeline/rubric-loader.ts — Load and validate rubric config.
+ *
+ * Extracted from the legacy expand-tasks.ts so that callers (e.g.,
+ * calculate-scores.ts) can load rubric templates without pulling in
+ * the deprecated task expansion machinery.
+ *
+ * @see packages/eval/config/rubrics.ts — the rubric configuration
+ * @see packages/core/src/schemas/pipeline.ts — RubricConfigSchema
+ */
+import { type RubricConfig } from "../_vendor/ailf-core/index.d.ts";
+/**
+ * Load and validate config/rubrics from the given root directory.
+ * Caches the result for subsequent calls with the same rootDir.
+ */
+export declare function loadRubricTemplates(rootDir: string): RubricConfig;
+/**
+ * Reset the rubric config cache. Useful in tests.
+ */
+export declare function resetRubricCache(): void;

package/dist/pipeline/rubric-loader.js ADDED Viewed

@@ -0,0 +1,37 @@
+/**
+ * pipeline/rubric-loader.ts — Load and validate rubric config.
+ *
+ * Extracted from the legacy expand-tasks.ts so that callers (e.g.,
+ * calculate-scores.ts) can load rubric templates without pulling in
+ * the deprecated task expansion machinery.
+ *
+ * @see packages/eval/config/rubrics.ts — the rubric configuration
+ * @see packages/core/src/schemas/pipeline.ts — RubricConfigSchema
+ */
+import { RubricConfigSchema } from "../_vendor/ailf-core/index.js";
+import { loadConfigFile } from "./compiler/config-loader.js";
+let cachedRubricConfig = null;
+/**
+ * Load and validate config/rubrics from the given root directory.
+ * Caches the result for subsequent calls with the same rootDir.
+ */
+export function loadRubricTemplates(rootDir) {
+    if (cachedRubricConfig)
+        return cachedRubricConfig;
+    const { data } = loadConfigFile("rubrics", rootDir);
+    const result = RubricConfigSchema.safeParse(data);
+    if (!result.success) {
+        const messages = result.error.issues
+            .map((i) => `  [${i.path.join(".")}]: ${i.message}`)
+            .join("\n");
+        throw new Error(`Invalid config/rubrics:\n${messages}`);
+    }
+    cachedRubricConfig = result.data;
+    return result.data;
+}
+/**
+ * Reset the rubric config cache. Useful in tests.
+ */
+export function resetRubricCache() {
+    cachedRubricConfig = null;
+}