npm - @sanity/ailf - Versions diffs - 0.4.1 → 1.0.0 - Mend

@sanity/ailf 0.4.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (290) hide show

package/config/features.ts +23 -0
package/config/models.ts +83 -0
package/config/prompts.ts +16 -0
package/config/rubrics.ts +225 -0
package/config/schedules.ts +47 -0
package/config/sinks.ts +37 -0
package/config/sources.ts +21 -0
package/config/thresholds.ts +61 -0
package/dist/_vendor/ailf-core/config-helpers.d.ts +174 -0
package/dist/_vendor/ailf-core/config-helpers.js +150 -0
package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
package/dist/_vendor/ailf-core/env-helper.js +45 -0
package/dist/_vendor/ailf-core/examples/index.d.ts +10 -10
package/dist/_vendor/ailf-core/examples/index.js +10 -10
package/dist/_vendor/ailf-core/index.d.ts +3 -0
package/dist/_vendor/ailf-core/index.js +5 -0
package/dist/_vendor/ailf-core/ports/context.d.ts +15 -2
package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +7 -2
package/dist/_vendor/ailf-core/schemas/eval-config.js +7 -2
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +8 -3
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +6 -1
package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +32 -31
package/dist/_vendor/ailf-core/schemas/pipeline.js +52 -12
package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
package/dist/_vendor/ailf-core/services/index.js +2 -1
package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
package/dist/_vendor/ailf-core/services/scoring.js +25 -15
package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
package/dist/_vendor/ailf-core/types/generalized-task.d.ts +319 -0
package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
package/dist/_vendor/ailf-core/types/index.d.ts +45 -81
package/dist/_vendor/ailf-core/types/index.js +8 -1
package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +202 -0
package/dist/_vendor/ailf-core/types/plugin-registry.js +132 -0
package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
package/dist/_vendor/ailf-core/types/trace.js +18 -0
package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
package/dist/_vendor/ailf-shared/index.d.ts +0 -1
package/dist/_vendor/ailf-shared/index.js +0 -1
package/dist/adapters/api-client/build-request.js +14 -13
package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
package/dist/adapters/config-sources/file-config-adapter.js +38 -12
package/dist/adapters/config-sources/index.d.ts +2 -0
package/dist/adapters/config-sources/index.js +1 -0
package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
package/dist/adapters/config-sources/ts-config-loader.js +133 -0
package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
package/dist/adapters/task-sources/composite-task-source.js +1 -1
package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
package/dist/adapters/task-sources/content-lake-task-source.js +22 -23
package/dist/adapters/task-sources/index.d.ts +1 -0
package/dist/adapters/task-sources/index.js +1 -0
package/dist/adapters/task-sources/repo-task-source.d.ts +4 -4
package/dist/adapters/task-sources/repo-task-source.js +69 -16
package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
package/dist/adapters/task-sources/task-file-loader.js +83 -0
package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
package/dist/adapters/task-sources/yaml-task-source.js +19 -16
package/dist/cli.js +0 -2
package/dist/commands/baseline.js +4 -1
package/dist/commands/calculate-scores.js +1 -1
package/dist/commands/coverage-audit.js +7 -1
package/dist/commands/explain-handler.js +25 -23
package/dist/commands/fetch-docs.js +3 -2
package/dist/commands/generate-configs.js +1 -1
package/dist/commands/interactive.js +11 -7
package/dist/commands/pipeline-action.d.ts +2 -0
package/dist/commands/pipeline-action.js +16 -6
package/dist/commands/pipeline.d.ts +1 -0
package/dist/commands/pipeline.js +4 -2
package/dist/commands/pr-comment.js +1 -1
package/dist/commands/publish.js +2 -2
package/dist/commands/readiness-report.js +13 -6
package/dist/composition-root.d.ts +1 -1
package/dist/composition-root.js +67 -4
package/dist/orchestration/build-app-context.js +1 -0
package/dist/orchestration/build-step-sequence.js +24 -6
package/dist/orchestration/steps/calculate-scores-step.js +24 -11
package/dist/orchestration/steps/fetch-docs-step.js +6 -4
package/dist/orchestration/steps/gap-analysis-step.js +8 -7
package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
package/dist/orchestration/steps/generate-configs-step.js +245 -51
package/dist/orchestration/steps/grader-consistency-step.js +7 -4
package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
package/dist/orchestration/steps/readiness-step.js +5 -6
package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
package/dist/orchestration/steps/run-eval-step.js +8 -7
package/dist/pipeline/cache.d.ts +1 -1
package/dist/pipeline/cache.js +36 -8
package/dist/pipeline/calculate-scores.d.ts +5 -7
package/dist/pipeline/calculate-scores.js +74 -153
package/dist/pipeline/checks.js +2 -2
package/dist/pipeline/compare.js +8 -8
package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +355 -0
package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +471 -0
package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
package/dist/pipeline/compiler/assertion-mapper.js +175 -0
package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
package/dist/pipeline/compiler/config-loader.d.ts +56 -0
package/dist/pipeline/compiler/config-loader.js +111 -0
package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
package/dist/pipeline/compiler/fixture-resolver.js +113 -0
package/dist/pipeline/compiler/hash.d.ts +11 -0
package/dist/pipeline/compiler/hash.js +18 -0
package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
package/dist/pipeline/compiler/ignore-fields.js +113 -0
package/dist/pipeline/compiler/index.d.ts +29 -0
package/dist/pipeline/compiler/index.js +45 -0
package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
package/dist/pipeline/compiler/literacy-bridge.js +172 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
package/dist/pipeline/compiler/mode-handlers/index.d.ts +16 -0
package/dist/pipeline/compiler/mode-handlers/index.js +21 -0
package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +277 -0
package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +67 -0
package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +309 -0
package/dist/pipeline/compiler/presets/index.d.ts +9 -0
package/dist/pipeline/compiler/presets/index.js +8 -0
package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +45 -0
package/dist/pipeline/compiler/presets/sanity-literacy.js +354 -0
package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
package/dist/pipeline/compiler/provider-assembler.js +137 -0
package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
package/dist/pipeline/compiler/sandbox/index.js +11 -0
package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
package/dist/pipeline/compiler/scoring-bridge.js +114 -0
package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
package/dist/pipeline/compiler/task-graph-builder.js +291 -0
package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
package/dist/pipeline/compiler/telemetry/index.js +19 -0
package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
package/dist/pipeline/compiler/variable-resolver.js +115 -0
package/dist/pipeline/coverage-audit.d.ts +15 -5
package/dist/pipeline/coverage-audit.js +41 -22
package/dist/pipeline/eval-constants.d.ts +16 -6
package/dist/pipeline/eval-constants.js +25 -4
package/dist/pipeline/eval-fingerprint.d.ts +2 -2
package/dist/pipeline/eval-fingerprint.js +8 -9
package/dist/pipeline/expand-tasks.d.ts +23 -14
package/dist/pipeline/expand-tasks.js +37 -31
package/dist/pipeline/gap-analysis.d.ts +1 -1
package/dist/pipeline/gap-analysis.js +2 -2
package/dist/pipeline/generate-configs.d.ts +22 -4
package/dist/pipeline/generate-configs.js +53 -24
package/dist/pipeline/grader-api.d.ts +3 -3
package/dist/pipeline/grader-api.js +5 -12
package/dist/pipeline/grader-compare-runner.js +20 -27
package/dist/pipeline/grader-comparison.d.ts +4 -8
package/dist/pipeline/grader-comparison.js +11 -17
package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
package/dist/pipeline/grader-consistency-runner.js +18 -21
package/dist/pipeline/grader-consistency.d.ts +6 -10
package/dist/pipeline/grader-consistency.js +13 -32
package/dist/pipeline/grader-sensitivity-runner.js +7 -5
package/dist/pipeline/grader-sensitivity.d.ts +2 -6
package/dist/pipeline/grader-sensitivity.js +10 -10
package/dist/pipeline/grader-validate-runner.js +7 -5
package/dist/pipeline/grader-validation.d.ts +2 -6
package/dist/pipeline/grader-validation.js +14 -22
package/dist/pipeline/map-request-to-config.js +6 -1
package/dist/pipeline/mirror-repo-tasks.d.ts +6 -6
package/dist/pipeline/mirror-repo-tasks.js +16 -15
package/dist/pipeline/normalize-mode.d.ts +49 -0
package/dist/pipeline/normalize-mode.js +64 -0
package/dist/pipeline/plan.d.ts +5 -2
package/dist/pipeline/plan.js +134 -78
package/dist/pipeline/pr-comment.js +2 -0
package/dist/pipeline/profile-resolution.d.ts +47 -0
package/dist/pipeline/profile-resolution.js +91 -0
package/dist/pipeline/provenance.d.ts +2 -2
package/dist/pipeline/provenance.js +12 -17
package/dist/pipeline/release-report.js +4 -4
package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
package/dist/pipeline/repo-threshold-evaluator.js +1 -1
package/dist/pipeline/rubric-loader.d.ts +20 -0
package/dist/pipeline/rubric-loader.js +37 -0
package/dist/pipeline/validate.d.ts +4 -4
package/dist/pipeline/validate.js +64 -53
package/dist/schedules/loader.js +18 -8
package/dist/scripts/migrate-task-mode.d.ts +24 -0
package/dist/scripts/migrate-task-mode.js +85 -0
package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
package/dist/scripts/validate-task-sources.d.ts +1 -1
package/dist/scripts/validate-task-sources.js +15 -15
package/dist/sinks/loader.js +5 -7
package/dist/sources.d.ts +7 -7
package/dist/sources.js +22 -24
package/dist/webhook/dispatch.js +2 -1
package/package.json +6 -3
package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
package/tasks/literacy/frameworks.task.ts +128 -0
package/tasks/literacy/functions.task.ts +69 -0
package/tasks/literacy/groq.task.ts +258 -0
package/tasks/literacy/nextjs-live.task.ts +75 -0
package/tasks/literacy/studio-setup.task.ts +131 -0
package/tasks/literacy/visual-editing.task.ts +146 -0
package/config/features.yaml +0 -116
package/config/models.yaml +0 -116
package/config/prompts.yaml +0 -75
package/config/rubrics.yaml +0 -62
package/config/schedules.yaml +0 -43
package/config/sinks.yaml +0 -54
package/config/sources.yaml +0 -51
package/config/thresholds.yaml +0 -49
package/dist/agent-observer/test-imports.d.ts +0 -7
package/dist/agent-observer/test-imports.js +0 -185

package/dist/orchestration/steps/calculate-scores-step.js CHANGED Viewed

@@ -5,11 +5,11 @@
  * typed options derived from AppContext. No env bridge needed.
  */
 import { join } from "path";
-import { FULL_MODE_SUBMODES } from "../../_vendor/ailf-shared/index.js";
+import { LiteracyVariant } from "../../pipeline/normalize-mode.js";
 import { getStepInputPaths } from "../../pipeline/cache.js";
 import { calculateAndWriteScores } from "../../pipeline/calculate-scores.js";
 import { checkResultsExist, checkScoreSummaryValid, } from "../../pipeline/checks.js";
-import { RESULTS_FILES } from "../../pipeline/eval-constants.js";
+import { resultsFileForMode } from "../../pipeline/eval-constants.js";
 import { loadSource } from "../../sources.js";
 import { configToSourceOverrides } from "../config-to-source-overrides.js";
 export class CalculateScoresStep {
@@ -23,10 +23,14 @@ export class CalculateScoresStep {
         // score-summary.json was already restored from the cached report.
         // Skip re-calculation — the raw eval-results files don't exist.
         if (state.remoteCacheHits?.size) {
-            const requiredModes = ctx.config.mode === "full"
-                ? [...FULL_MODE_SUBMODES]
-                : [ctx.config.mode];
-            const allCached = requiredModes.every((m) => state.remoteCacheHits.has(m));
+            // For literacy mode, determine required eval runs from variant
+            const variant = ctx.config.variant ?? LiteracyVariant.STANDARD;
+            const requiredRuns = ctx.config.mode === "literacy" && variant === LiteracyVariant.FULL
+                ? [LiteracyVariant.STANDARD, LiteracyVariant.AGENTIC]
+                : ctx.config.mode === "literacy"
+                    ? [variant]
+                    : [ctx.config.mode];
+            const allCached = requiredRuns.every((m) => state.remoteCacheHits.has(m));
             if (allCached) {
                 // Verify the restored score-summary.json is valid
                 const summaryIssues = checkScoreSummaryValid(ctx.config.rootDir);
@@ -40,10 +44,15 @@ export class CalculateScoresStep {
                 // If the summary is invalid, fall through to normal calculation
             }
         }
-        const primaryMode = ctx.config.mode === "full"
-            ? "baseline"
+        // Primary results file to score.
+        // For literacy: "full" variant uses baseline as primary; others use variant directly.
+        // For other modes: use the mode name.
+        const primaryResultsRun = ctx.config.mode === "literacy"
+            ? ctx.config.variant === LiteracyVariant.FULL
+                ? LiteracyVariant.STANDARD
+                : (ctx.config.variant ?? LiteracyVariant.STANDARD)
             : ctx.config.mode;
-        const resultsFile = RESULTS_FILES[primaryMode];
+        const resultsFile = resultsFileForMode(primaryResultsRun);
         // Precondition: results file exists
         const resultsIssues = checkResultsExist(ctx.config.rootDir, resultsFile);
         const resultsErrors = resultsIssues.filter((i) => i.severity === "error");
@@ -68,9 +77,13 @@ export class CalculateScoresStep {
             const result = calculateAndWriteScores({
                 allowedOrigins: ctx.config.allowedOrigins,
                 logger: ctx.logger,
-                mode: ctx.config.mode,
+                // Pass the variant for literacy (scoring uses it to decide
+                // whether to read agentic results), or mode for other modes
+                mode: ctx.config.mode === "literacy"
+                    ? (ctx.config.variant ?? LiteracyVariant.STANDARD)
+                    : ctx.config.mode,
                 resolvedSource,
-                resultsPath: primaryMode !== "baseline"
+                resultsPath: primaryResultsRun !== LiteracyVariant.STANDARD
                     ? join(ctx.config.rootDir, resultsFile)
                     : undefined,
                 rootDir: ctx.config.rootDir,

package/dist/orchestration/steps/fetch-docs-step.js CHANGED Viewed

@@ -28,8 +28,10 @@ export class FetchDocsStep {
         }
         const start = Date.now();
         // Precondition: at least one task has canonical doc mappings
-        const tasks = await ctx.taskSource.loadTasks(buildFilter(ctx));
-        const tasksWithDocs = tasks.filter((t) => t.canonicalDocs.length > 0);
+        const allTasks = await ctx.taskSource.loadTasks(buildFilter(ctx));
+        // Bridge: narrow to literacy tasks for canonical doc access
+        const literacyTasks = allTasks.filter((t) => t.mode === "literacy");
+        const tasksWithDocs = literacyTasks.filter((t) => (t.context?.docs?.length ?? 0) > 0);
         if (tasksWithDocs.length === 0) {
             return {
                 durationMs: Date.now() - start,
@@ -100,7 +102,7 @@ export class FetchDocsStep {
         if (resolvedSource.perspective &&
             releaseImpact &&
             !ctx.config.noAutoScope) {
-            const autoScope = computeAutoScope(tasks, releaseImpact, resolvedSource.perspective);
+            const autoScope = computeAutoScope(literacyTasks, releaseImpact, resolvedSource.perspective);
             if (autoScope) {
                 state.releaseAutoScope = autoScope;
                 logAutoScope(autoScope);
@@ -171,7 +173,7 @@ function writeMetadataFiles(rootDir, metadata) {
  */
 function extractSlugsFromTask(task) {
     const slugs = [];
-    for (const ref of task.canonicalDocs) {
+    for (const ref of task.context?.docs ?? []) {
         if (isSlugRef(ref)) {
             slugs.push(ref.slug);
         }

package/dist/orchestration/steps/gap-analysis-step.js CHANGED Viewed

@@ -89,7 +89,7 @@ export class GapAnalysisStep {
             const areaToDocRefs = new Map();
             let tasks = [];
             try {
-                tasks = await ctx.taskSource.loadTasks();
+                tasks = (await ctx.taskSource.loadTasks()).filter((t) => t.mode === "literacy");
             }
             catch {
                 // TaskSource may not be available in all contexts (e.g., standalone
@@ -99,17 +99,18 @@ export class GapAnalysisStep {
                 // Group tasks by feature area and build slug maps
                 const byArea = new Map();
                 for (const task of tasks) {
-                    const slugs = extractSlugsFromRefs(task.canonicalDocs);
+                    const slugs = extractSlugsFromRefs(task.context?.docs ?? []);
                     const refs = resolveRefs(slugs);
-                    // Map by description (what judgments use as taskId)
-                    descToDocRefs.set(task.description, refs);
+                    // Map by title (what judgments use as taskId)
+                    descToDocRefs.set(task.title, refs);
                     // Also map by task ID for prefix-based matching
                     descToDocRefs.set(task.id, refs);
                     // Group slugs by feature area
-                    if (!byArea.has(task.featureArea))
-                        byArea.set(task.featureArea, new Set());
+                    const area = task.area ?? "";
+                    if (!byArea.has(area))
+                        byArea.set(area, new Set());
                     for (const s of slugs)
-                        byArea.get(task.featureArea).add(s);
+                        byArea.get(area).add(s);
                 }
                 for (const [area, slugs] of byArea) {
                     areaToDocRefs.set(area, resolveRefs([...slugs]));

package/dist/orchestration/steps/generate-configs-step.d.ts CHANGED Viewed

@@ -1,14 +1,27 @@
 /**
  * Pipeline step: Generate Promptfoo configuration files.
  *
- * Calls generateConfigs() from pipeline/generate-configs.ts with typed options
- * derived from AppContext. No env bridge needed — source is resolved and
- * passed directly.
+ * ALL modes route through the Plugin Registry. The step looks up the mode
+ * handler via ctx.registry.getMode() and delegates compilation to it.
+ *
+ * Literacy mode has a variant strategy: baseline/agentic/observed/full.
+ * When the variant is "full", the handler is called twice (baseline + agentic)
+ * and three YAML files are written. Other modes produce one YAML file.
  */
 import type { AppContext, PipelineState, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
 export declare class GenerateConfigsStep implements PipelineStep {
     readonly name = "generate-configs";
     check(ctx: AppContext): ValidationIssue[];
     execute(ctx: AppContext, state: PipelineState): Promise<StepResult>;
+    private compileLiteracyVariants;
+    private compileSingleMode;
+    private loadTasks;
+    private applyFilters;
+    /**
+     * Compile all tasks through a handler, merging results.
+     * For literacy mode, ctx can carry evalMode as an extension.
+     */
+    private compileAll;
+    private checkLiteracyPostconditions;
     cacheInputs(ctx: AppContext): string[];
 }

package/dist/orchestration/steps/generate-configs-step.js CHANGED Viewed

@@ -1,13 +1,16 @@
 /**
  * Pipeline step: Generate Promptfoo configuration files.
  *
- * Calls generateConfigs() from pipeline/generate-configs.ts with typed options
- * derived from AppContext. No env bridge needed — source is resolved and
- * passed directly.
+ * ALL modes route through the Plugin Registry. The step looks up the mode
+ * handler via ctx.registry.getMode() and delegates compilation to it.
+ *
+ * Literacy mode has a variant strategy: baseline/agentic/observed/full.
+ * When the variant is "full", the handler is called twice (baseline + agentic)
+ * and three YAML files are written. Other modes produce one YAML file.
  */
+import { LiteracyVariant } from "../../pipeline/normalize-mode.js";
 import { getStepInputPaths } from "../../pipeline/cache.js";
 import { checkGeneratedConfigsExist } from "../../pipeline/checks.js";
-import { generateConfigs } from "../../pipeline/generate-configs.js";
 import { validateModelsYaml } from "../../pipeline/validate.js";
 import { loadSource } from "../../sources.js";
 import { configToSourceOverrides } from "../config-to-source-overrides.js";
@@ -19,68 +22,230 @@ export class GenerateConfigsStep {
     }
     async execute(ctx, state) {
         const start = Date.now();
-        // Resolve source once with typed overrides
-        const overrides = configToSourceOverrides(ctx.config);
-        const resolvedSource = ctx.config.source
-            ? loadSource(ctx.config.source, overrides)
-            : undefined;
-        // Load tasks via the TaskSource port — this picks up Content Lake,
-        // repo-based, and YAML tasks depending on which adapter is wired.
-        let tasks;
+        const mode = ctx.config.mode;
+        // Look up mode handler in the registry
+        const registration = ctx.registry.getMode(mode);
+        if (!registration) {
+            return {
+                durationMs: Date.now() - start,
+                error: `No handler registered for mode "${mode}". ` +
+                    `Available modes: ${ctx.registry
+                        .getModes()
+                        .map((m) => m.id)
+                        .join(", ")}`,
+                status: "failed",
+            };
+        }
         try {
-            const filter = ctx.config.areas || ctx.config.tasks || ctx.config.tags
-                ? {
-                    areas: ctx.config.areas,
-                    taskIds: ctx.config.tasks,
-                    tags: ctx.config.tags,
-                }
+            // Dynamically import the handler module
+            const handlerModule = await import(`../../pipeline/compiler/${registration.handlerModule}`);
+            const handler = handlerModule.handler;
+            if (!handler?.compileTask) {
+                return {
+                    durationMs: Date.now() - start,
+                    error: `Handler module for "${mode}" does not export a valid ModeHandler`,
+                    status: "failed",
+                };
+            }
+            // Load tasks
+            const tasks = await this.loadTasks(ctx, mode, state);
+            if (tasks.length === 0) {
+                return {
+                    durationMs: Date.now() - start,
+                    error: `No ${mode} tasks found. Create *.task.ts files in ` +
+                        `packages/eval/tasks/${mode}/`,
+                    status: "failed",
+                };
+            }
+            // Load models
+            const { loadModelsAndProviders } = await import("../../pipeline/compiler/provider-assembler.js");
+            const overrides = configToSourceOverrides(ctx.config);
+            const resolvedSource = ctx.config.source
+                ? loadSource(ctx.config.source, overrides)
                 : undefined;
-            tasks = await ctx.taskSource.loadTasks(filter);
+            const { models, providers } = loadModelsAndProviders(ctx.config.rootDir, resolvedSource, ctx.config.searchMode, ctx.config.allowedOrigins);
+            // Literacy mode: variant expansion (baseline + agentic → 3 YAML files)
+            if (mode === "literacy") {
+                return this.compileLiteracyVariants(ctx, handler, tasks, models, providers, start);
+            }
+            // All other modes: single compilation → single YAML file
+            return this.compileSingleMode(ctx, handler, tasks, mode, models, start);
         }
         catch (err) {
+            const msg = err instanceof Error ? err.message : String(err);
             return {
                 durationMs: Date.now() - start,
-                error: `TaskSource.loadTasks failed: ${err instanceof Error ? err.message : String(err)}`,
+                error: `${mode} compilation failed: ${msg}`,
                 status: "failed",
             };
         }
-        // Release auto-scope: narrow tasks to those affected by the release.
-        // When explicit area/task filters are also active, this produces the
-        // intersection (only tasks matching BOTH the explicit filter AND the
-        // release impact are included).
+    }
+    // ---------------------------------------------------------------------------
+    // Literacy variant compilation (baseline + agentic → 3 YAML files)
+    // ---------------------------------------------------------------------------
+    async compileLiteracyVariants(ctx, handler, tasks, models, providers, start) {
+        ctx.logger.info(`Compiling ${tasks.length} literacy task(s) via registry handler...`);
+        // Filter models per variant
+        const baselineModels = models.models
+            .filter((m) => !m.modes || m.modes.includes(LiteracyVariant.STANDARD))
+            .map((m) => ({
+            id: m.id,
+            label: m.label,
+        }));
+        const agenticModels = models.models
+            .filter((m) => !m.modes ||
+            m.modes.includes("agentic-naive") ||
+            m.modes.includes("agentic-optimized"))
+            .map((m) => ({
+            id: m.id,
+            label: m.label,
+        }));
+        // Load rubric config for template resolution
+        let rubricConfig;
+        try {
+            const { loadRubricTemplates } = await import("../../pipeline/rubric-loader.js");
+            rubricConfig = loadRubricTemplates(ctx.config.rootDir);
+        }
+        catch {
+            ctx.logger.warn("  ⚠ Could not load rubric config — templates will not resolve");
+        }
+        // Compile for each variant
+        const baselineResults = this.compileAll(handler, tasks, {
+            rootDir: ctx.config.rootDir,
+            graderProvider: models.grader.id,
+            models: baselineModels,
+            rubricConfig,
+            evalMode: LiteracyVariant.STANDARD,
+        });
+        const agenticResults = this.compileAll(handler, tasks, {
+            rootDir: ctx.config.rootDir,
+            graderProvider: models.grader.id,
+            models: agenticModels,
+            rubricConfig,
+            evalMode: LiteracyVariant.AGENTIC,
+        });
+        // Log warnings
+        for (const w of [...baselineResults.warnings, ...agenticResults.warnings]) {
+            ctx.logger.warn(`  ⚠ ${w}`);
+        }
+        ctx.logger.info(`  Compiled ${tasks.length} task(s) → ${baselineResults.tests.length} baseline + ${agenticResults.tests.length} agentic entries`);
+        // Write 3 YAML files via the literacy-specific writer
+        const { writeCompiledLiteracyConfigs } = await import("../../pipeline/compiler/compiler-to-yaml.js");
+        writeCompiledLiteracyConfigs(baselineResults, agenticResults, providers, {
+            rootDir: ctx.config.rootDir,
+            graderProvider: models.grader.id,
+            maxConcurrency: models.maxConcurrency,
+            logger: ctx.logger,
+        });
+        return this.checkLiteracyPostconditions(ctx, start);
+    }
+    // ---------------------------------------------------------------------------
+    // Single-mode compilation (all non-literacy modes)
+    // ---------------------------------------------------------------------------
+    async compileSingleMode(ctx, handler, tasks, mode, models, start) {
+        ctx.logger.info(`Compiling ${tasks.length} ${mode} task(s) via registry handler...`);
+        const merged = this.compileAll(handler, tasks, {
+            rootDir: ctx.config.rootDir,
+            graderProvider: models.grader.id,
+            models: models.models.map((m) => ({
+                id: m.id,
+                label: m.label,
+            })),
+        });
+        for (const w of merged.warnings) {
+            ctx.logger.warn(`  ⚠ ${w}`);
+        }
+        ctx.logger.info(`  Compiled ${tasks.length} task(s) → ${merged.tests.length} test entries`);
+        const { writeCompiledModeConfig } = await import("../../pipeline/compiler/compiler-to-yaml.js");
+        writeCompiledModeConfig(merged, mode, {
+            rootDir: ctx.config.rootDir,
+            graderProvider: models.grader.id,
+            maxConcurrency: models.maxConcurrency,
+            logger: ctx.logger,
+        });
+        return {
+            durationMs: Date.now() - start,
+            status: "success",
+            summary: `Generated promptfooconfig.${mode}.yaml`,
+        };
+    }
+    // ---------------------------------------------------------------------------
+    // Task loading — unified for all modes
+    // ---------------------------------------------------------------------------
+    async loadTasks(ctx, mode, state) {
+        const { resolve } = await import("path");
+        const { discoverTsTaskFiles, loadTsTaskFile } = await import("../../adapters/task-sources/task-file-loader.js");
+        const tasksDir = resolve(ctx.config.rootDir, "tasks", mode);
+        const files = discoverTsTaskFiles(tasksDir);
+        const tasks = [];
+        for (const file of files) {
+            const raw = await loadTsTaskFile(file);
+            for (const t of raw.tasks) {
+                const task = t;
+                // Filter to matching mode (skip tasks from other modes in same dir)
+                if (!("mode" in task) || task.mode === mode) {
+                    tasks.push(task);
+                }
+            }
+        }
+        // Apply area/task/tag filters
+        const filtered = this.applyFilters(ctx, tasks);
+        // Release auto-scope
         if (state.releaseAutoScope && !ctx.config.noAutoScope) {
             const scopedIds = new Set(state.releaseAutoScope.affectedTaskIds);
-            const beforeCount = tasks.length;
-            tasks = tasks.filter((t) => scopedIds.has(t.id));
-            console.log(`  🎯 Auto-scoped to ${tasks.length} of ${beforeCount} task(s) affected by release` +
-                ` (${beforeCount - tasks.length} skipped, --no-auto-scope to override)`);
+            const beforeCount = filtered.length;
+            const scoped = filtered.filter((t) => "id" in t && scopedIds.has(t.id));
+            ctx.logger.info(`  🎯 Auto-scoped to ${scoped.length} of ${beforeCount} task(s) affected by release`);
+            return scoped;
         }
-        try {
-            generateConfigs({
-                allowedOrigins: ctx.config.allowedOrigins,
-                filter: ctx.config.areas || ctx.config.tasks || ctx.config.tags
-                    ? {
-                        areas: ctx.config.areas,
-                        taskIds: ctx.config.tasks,
-                        tags: ctx.config.tags,
-                    }
-                    : undefined,
-                logger: ctx.logger,
-                resolvedSource,
-                rootDir: ctx.config.rootDir,
-                searchMode: ctx.config.searchMode,
-                source: ctx.config.source,
-                tasks,
+        return filtered;
+    }
+    applyFilters(ctx, tasks) {
+        let result = tasks;
+        if (ctx.config.areas?.length) {
+            const allowed = new Set(ctx.config.areas.map((a) => a.toLowerCase()));
+            result = result.filter((t) => {
+                const area = t.area?.toLowerCase();
+                return area && allowed.has(area);
             });
         }
-        catch (err) {
-            return {
-                durationMs: Date.now() - start,
-                error: `generate-configs failed: ${err instanceof Error ? err.message : String(err)}`,
-                status: "failed",
-            };
+        if (ctx.config.tasks?.length) {
+            const allowed = new Set(ctx.config.tasks);
+            result = result.filter((t) => {
+                const id = t.id;
+                return id && allowed.has(id);
+            });
         }
-        // Postcondition: config files exist
+        if (ctx.config.tags?.length) {
+            const allowed = new Set(ctx.config.tags);
+            result = result.filter((t) => {
+                const tags = t.tags;
+                return tags?.some((tag) => allowed.has(tag));
+            });
+        }
+        return result;
+    }
+    // ---------------------------------------------------------------------------
+    // Compilation helpers
+    // ---------------------------------------------------------------------------
+    /**
+     * Compile all tasks through a handler, merging results.
+     * For literacy mode, ctx can carry evalMode as an extension.
+     */
+    compileAll(handler, tasks, ctx) {
+        const results = [];
+        const warnings = [];
+        for (const task of tasks) {
+            const result = handler.compileTask(task, ctx);
+            results.push(result);
+            warnings.push(...result.warnings);
+        }
+        return mergeCompileResults(results);
+    }
+    // ---------------------------------------------------------------------------
+    // Postcondition checks
+    // ---------------------------------------------------------------------------
+    checkLiteracyPostconditions(ctx, start) {
         const configIssues = checkGeneratedConfigsExist(ctx.config.rootDir);
         const configErrors = configIssues.filter((i) => i.severity === "error");
         if (configErrors.length > 0) {
@@ -100,3 +265,32 @@ export class GenerateConfigsStep {
         return getStepInputPaths(ctx.config.rootDir, "generate-configs");
     }
 }
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+/**
+ * Merge multiple compile results into one.
+ *
+ * Note: `providers` and `prompts` are taken from the first result only.
+ * This is correct for single-mode compilation where all tasks share the
+ * same provider set. Cross-mode merging with per-task provider overrides
+ * would need deduplication here.
+ */
+function mergeCompileResults(results) {
+    const tests = results.flatMap((r) => r.tests);
+    const warnings = results.flatMap((r) => r.warnings);
+    const providers = results[0]?.providers ?? [];
+    const prompts = results[0]?.prompts ?? [];
+    const extras = {};
+    for (const r of results) {
+        if (r.extras)
+            Object.assign(extras, r.extras);
+    }
+    return {
+        providers,
+        tests,
+        prompts,
+        warnings,
+        ...(Object.keys(extras).length > 0 ? { extras } : {}),
+    };
+}

package/dist/orchestration/steps/grader-consistency-step.js CHANGED Viewed

@@ -6,8 +6,9 @@
  */
 import { existsSync } from "fs";
 import { resolve } from "path";
+import { LiteracyVariant } from "../../pipeline/normalize-mode.js";
 import { checkResultsExist } from "../../pipeline/checks.js";
-import { RESULTS_FILES } from "../../pipeline/eval-constants.js";
+import { resultsFileForMode } from "../../pipeline/eval-constants.js";
 import { runGraderConsistency } from "../../pipeline/grader-consistency-runner.js";
 export class GraderConsistencyStep {
     name = "grader-consistency";
@@ -18,10 +19,12 @@ export class GraderConsistencyStep {
     async execute(ctx) {
         const start = Date.now();
         const replications = ctx.config.graderReplications ?? 5;
-        const concreteMode = ctx.config.mode === "full"
-            ? "baseline"
+        const primaryResultsRun = ctx.config.mode === "literacy"
+            ? ctx.config.variant === LiteracyVariant.FULL
+                ? LiteracyVariant.STANDARD
+                : (ctx.config.variant ?? LiteracyVariant.STANDARD)
             : ctx.config.mode;
-        const resultsFile = RESULTS_FILES[concreteMode];
+        const resultsFile = resultsFileForMode(primaryResultsRun);
         // Precondition: results file exists
         const resultsIssues = checkResultsExist(ctx.config.rootDir, resultsFile);
         const resultsErrors = resultsIssues.filter((i) => i.severity === "error");

package/dist/orchestration/steps/mirror-repo-tasks-step.js CHANGED Viewed

@@ -43,7 +43,7 @@ export class MirrorRepoTasksStep {
             // RepoTaskSource via a fresh instance.
             const { RepoTaskSource } = await import("../../adapters/task-sources/repo-task-source.js");
             const repoSource = new RepoTaskSource(ctx.config.repoTasksPath);
-            const repoTasks = await repoSource.loadTasks();
+            const repoTasks = (await repoSource.loadTasks()).filter((t) => t.mode === "literacy");
             if (repoTasks.length === 0) {
                 return {
                     durationMs: Date.now() - start,

package/dist/orchestration/steps/readiness-step.js CHANGED Viewed

@@ -6,7 +6,7 @@
  */
 import { existsSync, readFileSync, writeFileSync } from "fs";
 import { resolve } from "path";
-import { load } from "js-yaml";
+import { tryLoadConfigFile } from "../../pipeline/compiler/config-loader.js";
 import { formatReadinessMarkdown, generateReadinessReport, } from "../../pipeline/readiness-report.js";
 import { ThresholdConfigSchema } from "../../pipeline/schemas.js";
 export class ReadinessStep {
@@ -20,7 +20,6 @@ export class ReadinessStep {
         const start = Date.now();
         try {
             const scoreSummaryPath = resolve(root, "results", "latest", "score-summary.json");
-            const thresholdsPath = resolve(root, "config", "thresholds.yaml");
             if (!existsSync(scoreSummaryPath)) {
                 return {
                     durationMs: Date.now() - start,
@@ -28,16 +27,16 @@ export class ReadinessStep {
                     status: "failed",
                 };
             }
-            if (!existsSync(thresholdsPath)) {
+            const thresholdsLoaded = tryLoadConfigFile("thresholds", root);
+            if (!thresholdsLoaded) {
                 return {
                     durationMs: Date.now() - start,
-                    error: "config/thresholds.yaml not found",
+                    error: "config/thresholds not found",
                     status: "failed",
                 };
             }
             const scoreSummary = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
-            const rawThresholds = load(readFileSync(thresholdsPath, "utf-8"));
-            const thresholdConfig = ThresholdConfigSchema.parse(rawThresholds);
+            const thresholdConfig = ThresholdConfigSchema.parse(thresholdsLoaded.data);
             const gapPath = resolve(root, "results", "latest", "gap-analysis.json");
             const gapAnalysis = existsSync(gapPath)
                 ? JSON.parse(readFileSync(gapPath, "utf-8"))

package/dist/orchestration/steps/run-eval-step.d.ts CHANGED Viewed

@@ -5,12 +5,11 @@
  * invocation. Builds a clean env object for the subprocess instead of
  * polluting global process.env.
  */
-import type { ConcreteEvalMode } from "../../_vendor/ailf-shared/index.d.ts";
 import type { AppContext, PipelineState, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
 export declare class RunEvalStep implements PipelineStep {
     private readonly mode;
     readonly name: string;
-    constructor(mode: ConcreteEvalMode);
+    constructor(mode: string);
     check(): ValidationIssue[];
     execute(ctx: AppContext, state: PipelineState): Promise<StepResult>;
     cacheInputs(ctx: AppContext): string[];

package/dist/orchestration/steps/run-eval-step.js CHANGED Viewed

@@ -10,7 +10,7 @@ import { resolve } from "path";
 import { getStepInputPaths } from "../../pipeline/cache.js";
 import { checkCanonicalContextsExist, checkGeneratedConfigsExist, checkResultsExist, } from "../../pipeline/checks.js";
 import { computeEvalFingerprint } from "../../pipeline/eval-fingerprint.js";
-import { buildFilterFlags, CONFIG_FILES, RESULTS_FILES, scanResultsForErrors, } from "../../pipeline/eval-constants.js";
+import { buildFilterFlags, configFileForMode, resultsFileForMode, scanResultsForErrors, } from "../../pipeline/eval-constants.js";
 export class RunEvalStep {
     mode;
     name;
@@ -59,7 +59,8 @@ export class RunEvalStep {
         // The generated Promptfoo config still includes their "without-docs"
         // variant (testing model knowledge alone), which doesn't need a
         // context file.
-        const tasksWithDocs = tasks.filter((t) => t.canonicalDocs.length > 0);
+        // Bridge: narrow to literacy tasks with docs
+        const tasksWithDocs = tasks.filter((t) => t.mode === "literacy" && (t.context?.docs?.length ?? 0) > 0);
         const taskIds = tasksWithDocs.map((t) => t.id);
         const contextIssues = checkCanonicalContextsExist(rootDir, taskIds);
         const contextErrors = contextIssues.filter((i) => i.severity === "error");
@@ -123,7 +124,7 @@ export class RunEvalStep {
         };
         // Only set env vars that differ from defaults — the subprocess inherits
         // process.env via PromptfooEvalAdapter's { ...process.env, ...config.env }
-        if (ctx.config.mode !== "baseline") {
+        if (ctx.config.mode !== "literacy") {
             subprocessEnv.EVAL_MODE = ctx.config.mode;
         }
         if (ctx.config.searchMode !== "open") {
@@ -135,7 +136,7 @@ export class RunEvalStep {
         // -----------------------------------------------------------------
         // Execute — use the EvalRunner port
         // -----------------------------------------------------------------
-        const configFile = CONFIG_FILES[this.mode];
+        const configFile = configFileForMode(this.mode);
         const filterFlags = buildFilterFlags(debug);
         const result = await ctx.evalRunner.run({
             concurrency,
@@ -145,7 +146,7 @@ export class RunEvalStep {
         });
         // Check if results were written despite non-zero exit
         if (result.status === "failed") {
-            const resultsExist = checkResultsExist(rootDir, RESULTS_FILES[this.mode]);
+            const resultsExist = checkResultsExist(rootDir, resultsFileForMode(this.mode));
             const hasResults = resultsExist.filter((i) => i.severity === "error").length === 0;
             if (!hasResults) {
                 return {
@@ -156,7 +157,7 @@ export class RunEvalStep {
             }
         }
         // Postcondition: results file exists
-        const resultsIssues = checkResultsExist(rootDir, RESULTS_FILES[this.mode]);
+        const resultsIssues = checkResultsExist(rootDir, resultsFileForMode(this.mode));
         const resultsErrors = resultsIssues.filter((i) => i.severity === "error");
         if (resultsErrors.length > 0) {
             return {
@@ -166,7 +167,7 @@ export class RunEvalStep {
             };
         }
         // Scan results for errors
-        const errorSummary = scanResultsForErrors(resolve(rootDir, RESULTS_FILES[this.mode]));
+        const errorSummary = scanResultsForErrors(resolve(rootDir, resultsFileForMode(this.mode)));
         if (errorSummary) {
             console.log();
             console.log(errorSummary);