npm - @sanity/ailf - Versions diffs - 0.4.1 → 1.0.0 - Mend

@sanity/ailf 0.4.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (290) hide show

package/config/features.ts +23 -0
package/config/models.ts +83 -0
package/config/prompts.ts +16 -0
package/config/rubrics.ts +225 -0
package/config/schedules.ts +47 -0
package/config/sinks.ts +37 -0
package/config/sources.ts +21 -0
package/config/thresholds.ts +61 -0
package/dist/_vendor/ailf-core/config-helpers.d.ts +174 -0
package/dist/_vendor/ailf-core/config-helpers.js +150 -0
package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
package/dist/_vendor/ailf-core/env-helper.js +45 -0
package/dist/_vendor/ailf-core/examples/index.d.ts +10 -10
package/dist/_vendor/ailf-core/examples/index.js +10 -10
package/dist/_vendor/ailf-core/index.d.ts +3 -0
package/dist/_vendor/ailf-core/index.js +5 -0
package/dist/_vendor/ailf-core/ports/context.d.ts +15 -2
package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +7 -2
package/dist/_vendor/ailf-core/schemas/eval-config.js +7 -2
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +8 -3
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +6 -1
package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +32 -31
package/dist/_vendor/ailf-core/schemas/pipeline.js +52 -12
package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
package/dist/_vendor/ailf-core/services/index.js +2 -1
package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
package/dist/_vendor/ailf-core/services/scoring.js +25 -15
package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
package/dist/_vendor/ailf-core/types/generalized-task.d.ts +319 -0
package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
package/dist/_vendor/ailf-core/types/index.d.ts +45 -81
package/dist/_vendor/ailf-core/types/index.js +8 -1
package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +202 -0
package/dist/_vendor/ailf-core/types/plugin-registry.js +132 -0
package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
package/dist/_vendor/ailf-core/types/trace.js +18 -0
package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
package/dist/_vendor/ailf-shared/index.d.ts +0 -1
package/dist/_vendor/ailf-shared/index.js +0 -1
package/dist/adapters/api-client/build-request.js +14 -13
package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
package/dist/adapters/config-sources/file-config-adapter.js +38 -12
package/dist/adapters/config-sources/index.d.ts +2 -0
package/dist/adapters/config-sources/index.js +1 -0
package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
package/dist/adapters/config-sources/ts-config-loader.js +133 -0
package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
package/dist/adapters/task-sources/composite-task-source.js +1 -1
package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
package/dist/adapters/task-sources/content-lake-task-source.js +22 -23
package/dist/adapters/task-sources/index.d.ts +1 -0
package/dist/adapters/task-sources/index.js +1 -0
package/dist/adapters/task-sources/repo-task-source.d.ts +4 -4
package/dist/adapters/task-sources/repo-task-source.js +69 -16
package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
package/dist/adapters/task-sources/task-file-loader.js +83 -0
package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
package/dist/adapters/task-sources/yaml-task-source.js +19 -16
package/dist/cli.js +0 -2
package/dist/commands/baseline.js +4 -1
package/dist/commands/calculate-scores.js +1 -1
package/dist/commands/coverage-audit.js +7 -1
package/dist/commands/explain-handler.js +25 -23
package/dist/commands/fetch-docs.js +3 -2
package/dist/commands/generate-configs.js +1 -1
package/dist/commands/interactive.js +11 -7
package/dist/commands/pipeline-action.d.ts +2 -0
package/dist/commands/pipeline-action.js +16 -6
package/dist/commands/pipeline.d.ts +1 -0
package/dist/commands/pipeline.js +4 -2
package/dist/commands/pr-comment.js +1 -1
package/dist/commands/publish.js +2 -2
package/dist/commands/readiness-report.js +13 -6
package/dist/composition-root.d.ts +1 -1
package/dist/composition-root.js +67 -4
package/dist/orchestration/build-app-context.js +1 -0
package/dist/orchestration/build-step-sequence.js +24 -6
package/dist/orchestration/steps/calculate-scores-step.js +24 -11
package/dist/orchestration/steps/fetch-docs-step.js +6 -4
package/dist/orchestration/steps/gap-analysis-step.js +8 -7
package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
package/dist/orchestration/steps/generate-configs-step.js +245 -51
package/dist/orchestration/steps/grader-consistency-step.js +7 -4
package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
package/dist/orchestration/steps/readiness-step.js +5 -6
package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
package/dist/orchestration/steps/run-eval-step.js +8 -7
package/dist/pipeline/cache.d.ts +1 -1
package/dist/pipeline/cache.js +36 -8
package/dist/pipeline/calculate-scores.d.ts +5 -7
package/dist/pipeline/calculate-scores.js +74 -153
package/dist/pipeline/checks.js +2 -2
package/dist/pipeline/compare.js +8 -8
package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +355 -0
package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +471 -0
package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
package/dist/pipeline/compiler/assertion-mapper.js +175 -0
package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
package/dist/pipeline/compiler/config-loader.d.ts +56 -0
package/dist/pipeline/compiler/config-loader.js +111 -0
package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
package/dist/pipeline/compiler/fixture-resolver.js +113 -0
package/dist/pipeline/compiler/hash.d.ts +11 -0
package/dist/pipeline/compiler/hash.js +18 -0
package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
package/dist/pipeline/compiler/ignore-fields.js +113 -0
package/dist/pipeline/compiler/index.d.ts +29 -0
package/dist/pipeline/compiler/index.js +45 -0
package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
package/dist/pipeline/compiler/literacy-bridge.js +172 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
package/dist/pipeline/compiler/mode-handlers/index.d.ts +16 -0
package/dist/pipeline/compiler/mode-handlers/index.js +21 -0
package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +277 -0
package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +67 -0
package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +309 -0
package/dist/pipeline/compiler/presets/index.d.ts +9 -0
package/dist/pipeline/compiler/presets/index.js +8 -0
package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +45 -0
package/dist/pipeline/compiler/presets/sanity-literacy.js +354 -0
package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
package/dist/pipeline/compiler/provider-assembler.js +137 -0
package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
package/dist/pipeline/compiler/sandbox/index.js +11 -0
package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
package/dist/pipeline/compiler/scoring-bridge.js +114 -0
package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
package/dist/pipeline/compiler/task-graph-builder.js +291 -0
package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
package/dist/pipeline/compiler/telemetry/index.js +19 -0
package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
package/dist/pipeline/compiler/variable-resolver.js +115 -0
package/dist/pipeline/coverage-audit.d.ts +15 -5
package/dist/pipeline/coverage-audit.js +41 -22
package/dist/pipeline/eval-constants.d.ts +16 -6
package/dist/pipeline/eval-constants.js +25 -4
package/dist/pipeline/eval-fingerprint.d.ts +2 -2
package/dist/pipeline/eval-fingerprint.js +8 -9
package/dist/pipeline/expand-tasks.d.ts +23 -14
package/dist/pipeline/expand-tasks.js +37 -31
package/dist/pipeline/gap-analysis.d.ts +1 -1
package/dist/pipeline/gap-analysis.js +2 -2
package/dist/pipeline/generate-configs.d.ts +22 -4
package/dist/pipeline/generate-configs.js +53 -24
package/dist/pipeline/grader-api.d.ts +3 -3
package/dist/pipeline/grader-api.js +5 -12
package/dist/pipeline/grader-compare-runner.js +20 -27
package/dist/pipeline/grader-comparison.d.ts +4 -8
package/dist/pipeline/grader-comparison.js +11 -17
package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
package/dist/pipeline/grader-consistency-runner.js +18 -21
package/dist/pipeline/grader-consistency.d.ts +6 -10
package/dist/pipeline/grader-consistency.js +13 -32
package/dist/pipeline/grader-sensitivity-runner.js +7 -5
package/dist/pipeline/grader-sensitivity.d.ts +2 -6
package/dist/pipeline/grader-sensitivity.js +10 -10
package/dist/pipeline/grader-validate-runner.js +7 -5
package/dist/pipeline/grader-validation.d.ts +2 -6
package/dist/pipeline/grader-validation.js +14 -22
package/dist/pipeline/map-request-to-config.js +6 -1
package/dist/pipeline/mirror-repo-tasks.d.ts +6 -6
package/dist/pipeline/mirror-repo-tasks.js +16 -15
package/dist/pipeline/normalize-mode.d.ts +49 -0
package/dist/pipeline/normalize-mode.js +64 -0
package/dist/pipeline/plan.d.ts +5 -2
package/dist/pipeline/plan.js +134 -78
package/dist/pipeline/pr-comment.js +2 -0
package/dist/pipeline/profile-resolution.d.ts +47 -0
package/dist/pipeline/profile-resolution.js +91 -0
package/dist/pipeline/provenance.d.ts +2 -2
package/dist/pipeline/provenance.js +12 -17
package/dist/pipeline/release-report.js +4 -4
package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
package/dist/pipeline/repo-threshold-evaluator.js +1 -1
package/dist/pipeline/rubric-loader.d.ts +20 -0
package/dist/pipeline/rubric-loader.js +37 -0
package/dist/pipeline/validate.d.ts +4 -4
package/dist/pipeline/validate.js +64 -53
package/dist/schedules/loader.js +18 -8
package/dist/scripts/migrate-task-mode.d.ts +24 -0
package/dist/scripts/migrate-task-mode.js +85 -0
package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
package/dist/scripts/validate-task-sources.d.ts +1 -1
package/dist/scripts/validate-task-sources.js +15 -15
package/dist/sinks/loader.js +5 -7
package/dist/sources.d.ts +7 -7
package/dist/sources.js +22 -24
package/dist/webhook/dispatch.js +2 -1
package/package.json +6 -3
package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
package/tasks/literacy/frameworks.task.ts +128 -0
package/tasks/literacy/functions.task.ts +69 -0
package/tasks/literacy/groq.task.ts +258 -0
package/tasks/literacy/nextjs-live.task.ts +75 -0
package/tasks/literacy/studio-setup.task.ts +131 -0
package/tasks/literacy/visual-editing.task.ts +146 -0
package/config/features.yaml +0 -116
package/config/models.yaml +0 -116
package/config/prompts.yaml +0 -75
package/config/rubrics.yaml +0 -62
package/config/schedules.yaml +0 -43
package/config/sinks.yaml +0 -54
package/config/sources.yaml +0 -51
package/config/thresholds.yaml +0 -49
package/dist/agent-observer/test-imports.d.ts +0 -7
package/dist/agent-observer/test-imports.js +0 -185

package/dist/pipeline/coverage-audit.js CHANGED Viewed

@@ -2,17 +2,15 @@
  * coverage-audit.ts
  *
  * Pure computation functions for cross-referencing the product feature registry
- * (config/features.yaml) against actual task files (tasks/*.yaml)
+ * (config/features) against actual task files (tasks/*.yaml)
  * to produce a documentation coverage audit.
  *
  * Phase 3c of the Scenario Matrix implementation.
  *
  * @see docs/exec-plans/scenario-matrix-implementation/phase-3-gap-analysis.md
  */
-import { existsSync, readFileSync } from "fs";
-import { join } from "path";
-import { load } from "js-yaml";
 import { ConsoleLogger } from "../adapters/loggers/index.js";
+import { tryLoadConfigFile } from "./compiler/config-loader.js";
 import { FeatureRegistrySchema } from "./schemas.js";
 import { resolveMappings } from "./resolve-mappings.js";
 // ---------------------------------------------------------------------------
@@ -112,31 +110,52 @@ export function formatCoverageMarkdown(report) {
 // Formatting
 // ---------------------------------------------------------------------------
 /**
- * Load and validate the feature registry from config/features.yaml.
+ * Load and validate the feature registry.
+ *
+ * Resolution order:
+ *   1. config/features file (user overrides) — if non-empty, wins
+ *   2. Registry features (preset-provided) — fallback when config is empty
+ *   3. null — no features available
  */
-export function loadFeatureRegistry(rootDir, logger) {
-    const log = logger ?? new ConsoleLogger();
-    const filePath = join(rootDir, "config", "features.yaml");
-    if (!existsSync(filePath)) {
-        return null;
-    }
-    const raw = readFileSync(filePath, "utf-8");
-    const parsed = load(raw);
-    const result = FeatureRegistrySchema.safeParse(parsed);
-    if (!result.success) {
-        log.error("❌ config/features.yaml validation failed:");
-        for (const issue of result.error.issues) {
-            log.error(`  ${issue.path.join(".")}: ${issue.message}`);
+export function loadFeatureRegistry(rootDir, options) {
+    const log = options?.logger ?? new ConsoleLogger();
+    // Priority 1: config file (user overrides)
+    const loaded = tryLoadConfigFile("features", rootDir);
+    if (loaded) {
+        // Check for intentionally empty config (stub for preset override).
+        // The Zod schema requires .min(1), so an empty features array would
+        // fail validation — but that's the expected state when the preset
+        // provides features and the config file is just an override point.
+        const raw = loaded.data;
+        if (Array.isArray(raw?.features) && raw.features.length === 0) {
+            // Empty config — fall through to registry
         }
-        return null;
+        else {
+            const result = FeatureRegistrySchema.safeParse(loaded.data);
+            if (!result.success) {
+                log.error("❌ config/features validation failed:");
+                for (const issue of result.error.issues) {
+                    log.error(`  ${issue.path.join(".")}: ${issue.message}`);
+                }
+                return null;
+            }
+            return result.data.features;
+        }
+    }
+    // Priority 2: registry (preset-provided features)
+    const registryFeatures = options?.registry?.getFeatureDefs();
+    if (registryFeatures && registryFeatures.features.length > 0) {
+        return registryFeatures.features;
     }
-    return result.data.features;
+    return null;
 }
 /**
  * Run the coverage audit and produce a structured report.
  */
-export function runCoverageAudit(rootDir) {
-    const features = loadFeatureRegistry(rootDir);
+export function runCoverageAudit(rootDir, options) {
+    const features = loadFeatureRegistry(rootDir, {
+        registry: options?.registry,
+    });
     if (!features)
         return null;
     const taskCounts = countTasksByArea(rootDir);

package/dist/pipeline/eval-constants.d.ts CHANGED Viewed

@@ -5,11 +5,21 @@
  * files can be deleted while tests and other modules retain access
  * to these shared definitions.
  */
-import type { ConcreteEvalMode, DebugOptions, FilterOptions, StepResult } from "../_vendor/ailf-core/index.d.ts";
-/** Promptfoo config file per concrete eval mode */
-export declare const CONFIG_FILES: Record<ConcreteEvalMode, string>;
-/** Each mode writes eval results to a different file (set in the config's outputPath) */
-export declare const RESULTS_FILES: Record<ConcreteEvalMode, string>;
+import type { DebugOptions, FilterOptions, StepResult } from "../_vendor/ailf-core/index.d.ts";
+/**
+ * Get the Promptfoo config file path for a given mode.
+ *
+ * Literacy variants use legacy naming for backward compatibility.
+ * All other modes use the pattern: `promptfooconfig.<mode>.yaml`
+ */
+export declare function configFileForMode(mode: string): string;
+/**
+ * Get the results file path for a given mode.
+ *
+ * Literacy variants use legacy naming for backward compatibility.
+ * All other modes use: `results/latest/eval-results-<mode>.json`
+ */
+export declare function resultsFileForMode(mode: string): string;
 /** Extended step result that carries cache metadata for downstream steps */
 export interface EvalStepResult {
     /** The computed eval fingerprint (for publishing in provenance) */
@@ -25,7 +35,7 @@ export interface RemoteCacheOptions {
     debug?: boolean;
     /** Filter options used for fingerprint computation */
     filter?: FilterOptions;
-    /** Grader model identifier from models.yaml */
+    /** Grader model identifier from models config */
     graderModel: string;
     /** Disable remote cache lookup (--no-remote-cache) */
     noRemoteCache?: boolean;

package/dist/pipeline/eval-constants.js CHANGED Viewed

@@ -6,18 +6,39 @@
  * to these shared definitions.
  */
 import { existsSync, readFileSync } from "fs";
-/** Promptfoo config file per concrete eval mode */
-export const CONFIG_FILES = {
+// ---------------------------------------------------------------------------
+// Config / results file naming — unified across all modes
+// ---------------------------------------------------------------------------
+/** Literacy variant names that map to legacy config file naming */
+const LITERACY_CONFIG_FILES = {
     agentic: "promptfooconfig.agentic.yaml",
     baseline: "promptfooconfig.yaml",
     observed: "promptfooconfig.observed.yaml",
 };
-/** Each mode writes eval results to a different file (set in the config's outputPath) */
-export const RESULTS_FILES = {
+/** Literacy variant names that map to legacy results file naming */
+const LITERACY_RESULTS_FILES = {
     agentic: "results/latest/eval-results-agentic.json",
     baseline: "results/latest/eval-results.json",
     observed: "results/latest/eval-results-observed.json",
 };
+/**
+ * Get the Promptfoo config file path for a given mode.
+ *
+ * Literacy variants use legacy naming for backward compatibility.
+ * All other modes use the pattern: `promptfooconfig.<mode>.yaml`
+ */
+export function configFileForMode(mode) {
+    return LITERACY_CONFIG_FILES[mode] ?? `promptfooconfig.${mode}.yaml`;
+}
+/**
+ * Get the results file path for a given mode.
+ *
+ * Literacy variants use legacy naming for backward compatibility.
+ * All other modes use: `results/latest/eval-results-<mode>.json`
+ */
+export function resultsFileForMode(mode) {
+    return (LITERACY_RESULTS_FILES[mode] ?? `results/latest/eval-results-${mode}.json`);
+}
 /**
  * Build promptfoo filter flags from debug options.
  */

package/dist/pipeline/eval-fingerprint.d.ts CHANGED Viewed

@@ -43,9 +43,9 @@ export interface FingerprintInput {
  * This is similar to `getStepInputPaths()` in `cache.ts` but is more
  * comprehensive and explicitly designed for cross-environment cache keys:
  *
- * - Includes `config/prompts.yaml` and `config/rubrics.yaml` directly
+ * - Includes `config/prompts` and `config/rubrics` directly
  *   (the local cache only includes them indirectly via generated configs)
- * - Includes `config/models.yaml` (model configuration)
+ * - Includes `config/models` (model configuration)
  * - Includes task definitions and reference solutions
  * - Includes the actual documentation content (contexts/canonical/*.md)
  * - Respects filter flags to only include relevant files

package/dist/pipeline/eval-fingerprint.js CHANGED Viewed

@@ -43,9 +43,9 @@ const FINGERPRINT_VERSION = "eval-fingerprint-v1";
  * This is similar to `getStepInputPaths()` in `cache.ts` but is more
  * comprehensive and explicitly designed for cross-environment cache keys:
  *
- * - Includes `config/prompts.yaml` and `config/rubrics.yaml` directly
+ * - Includes `config/prompts` and `config/rubrics` directly
  *   (the local cache only includes them indirectly via generated configs)
- * - Includes `config/models.yaml` (model configuration)
+ * - Includes `config/models` (model configuration)
  * - Includes task definitions and reference solutions
  * - Includes the actual documentation content (contexts/canonical/*.md)
  * - Respects filter flags to only include relevant files
@@ -56,11 +56,10 @@ export function collectFingerprintInputPaths(rootDir, filter) {
     // -----------------------------------------------------------------------
     // Config files — always included
     // -----------------------------------------------------------------------
-    const configFiles = [
-        "config/models.yaml",
-        "config/prompts.yaml",
-        "config/rubrics.yaml",
-    ];
+    // Check all supported extensions in priority order
+    const configNames = ["models", "prompts", "rubrics"];
+    const configExts = [".ts", ".js", ".yaml", ".yml", ".json"];
+    const configFiles = configNames.flatMap((name) => configExts.map((ext) => `config/${name}${ext}`));
     for (const f of configFiles) {
         const p = r(f);
         if (existsSync(p))
@@ -72,12 +71,12 @@ export function collectFingerprintInputPaths(rootDir, filter) {
     const tasksDir = r("tasks");
     if (existsSync(tasksDir)) {
         const taskFiles = readdirSync(tasksDir)
-            .filter((f) => f.endsWith(".yaml") || f.endsWith(".yml"))
+            .filter((f) => /\.(yaml|yml|task\.ts|task\.js)$/.test(f))
             .filter((f) => !f.startsWith(".")); // exclude .expanded.yaml
         for (const f of taskFiles) {
             // If area filter is set, only include matching task files
             if (filter?.areas && filter.areas.length > 0) {
-                const stem = f.replace(/\.ya?ml$/, "");
+                const stem = f.replace(/\.(yaml|yml|task\.ts|task\.js)$/, "");
                 if (!filter.areas.includes(stem))
                     continue;
             }

package/dist/pipeline/expand-tasks.d.ts CHANGED Viewed

@@ -1,6 +1,15 @@
 /**
  * pipeline/expand-tasks.ts
  *
+ * @deprecated This is part of the LEGACY compilation path. New code should
+ * use the literacy handler in the compiler pipeline instead:
+ *
+ *   import { compileLiteracyTask } from "./compiler/mode-handlers/literacy-handler.js"
+ *
+ * @see packages/eval/src/pipeline/compiler/mode-handlers/literacy-handler.ts
+ *
+ * ---
+ *
  * Reads task YAML files in the single-definition format and expands each
  * task into gold + baseline Promptfoo test entries. This eliminates the
  * manual duplication where every task had to be written twice.
@@ -34,15 +43,15 @@
  *         value: ["client.fetch", "createClient"]
  *     baseline:
  *       enabled: true
- *       rubric: abbreviated
+ *       rubric: full
  *
  * Expands to:
  *   1. Gold entry — uses vars.docs as-is, resolves templates, appends doc-coverage
- *   2. Baseline entry — sets docs: "", adds transform, uses abbreviated rubric
+ *   2. Baseline entry — sets docs: "", uses full rubric (same assertions as gold)
  */
-import type { TaskDefinition } from "../_vendor/ailf-core/index.d.ts";
-import type { Logger } from "../_vendor/ailf-core/index.d.ts";
+import type { LiteracyTaskDefinition, Logger } from "../_vendor/ailf-core/index.d.ts";
 import { type RubricConfig } from "./schemas.js";
+import { type LiteracyEvalSubMode } from "./normalize-mode.js";
 import type { FilterOptions } from "./types.js";
 /** Any assertion entry (templated or value-based). */
 export type AssertEntry = TemplatedAssert | ValueAssert;
@@ -72,14 +81,14 @@ export interface LegacyTaskEntry {
 }
 /** A single task definition in the new format (input). */
 export interface SingleTaskDefinition {
-    /** Grading assertions (applied to gold; optionally abbreviated for baseline). */
+    /** Grading assertions (applied to both gold and baseline by default). */
     assert: AssertEntry[];
     /** Baseline generation options. */
     baseline?: {
         /** Whether to generate a baseline variant. Default: true. */
         enabled?: boolean;
         /** Rubric mode: 'full' copies all asserts, 'abbreviated' generates a
-         *  summary rubric, 'none' omits rubric asserts. Default: 'abbreviated'. */
+         *  summary rubric, 'none' omits rubric asserts. Default: 'full'. */
         rubric?: "abbreviated" | "full" | "none";
     };
     /** Human-readable description of what this task tests. */
@@ -155,19 +164,19 @@ export declare function clearRubricCache(): void;
  *     a single prompt that doesn't use `{{docs}}`; baseline entries would be
  *     pure waste — identical prompts, wasted API calls).
  */
-export declare function expandTask(task: SingleTaskDefinition, rubricConfig: RubricConfig, mode?: "agentic" | "baseline"): ExpandedTestEntry[];
+export declare function expandTask(task: SingleTaskDefinition, rubricConfig: RubricConfig, mode?: LiteracyEvalSubMode): ExpandedTestEntry[];
 /**
- * Expand an array of TaskDefinition[] (from any TaskSource adapter) into
- * Promptfoo-compatible test entries. This is the TaskSource-aware counterpart
- * of loadAndExpandTasks() — it skips YAML file I/O and works directly with
- * the canonical domain type.
+ * Expand an array of LiteracyTaskDefinition[] (from any TaskSource adapter)
+ * into Promptfoo-compatible test entries. This is the TaskSource-aware
+ * counterpart of loadAndExpandTasks() — it skips YAML file I/O and works
+ * directly with the domain type.
  *
- * @param tasks - Task definitions from any TaskSource adapter
+ * @param tasks - Literacy task definitions from any TaskSource adapter
  * @param rootDir - Eval package root (needed to load rubric templates)
  * @param mode - Expansion mode: 'baseline' (gold + baseline) or 'agentic' (gold only)
  * @returns Expanded test entries and statistics
  */
-export declare function expandTaskDefinitions(tasks: TaskDefinition[], rootDir: string, mode?: "agentic" | "baseline"): {
+export declare function expandTaskDefinitions(tasks: LiteracyTaskDefinition[], rootDir: string, mode?: LiteracyEvalSubMode): {
     entries: ExpandedTestEntry[];
     stats: {
         totalTasks: number;
@@ -199,7 +208,7 @@ export declare function isTemplatedAssert(entry: AssertEntry): entry is Template
  *
  * Returns the expanded entries grouped by source file.
  */
-export declare function loadAndExpandTasks(rootDir: string, filter?: FilterOptions, mode?: "agentic" | "baseline", logger?: Logger): {
+export declare function loadAndExpandTasks(rootDir: string, filter?: FilterOptions, mode?: LiteracyEvalSubMode, logger?: Logger): {
     /** All expanded test entries, in order. */
     entries: ExpandedTestEntry[];
     /** Statistics about what was processed. */

package/dist/pipeline/expand-tasks.js CHANGED Viewed

@@ -1,6 +1,15 @@
 /**
  * pipeline/expand-tasks.ts
  *
+ * @deprecated This is part of the LEGACY compilation path. New code should
+ * use the literacy handler in the compiler pipeline instead:
+ *
+ *   import { compileLiteracyTask } from "./compiler/mode-handlers/literacy-handler.js"
+ *
+ * @see packages/eval/src/pipeline/compiler/mode-handlers/literacy-handler.ts
+ *
+ * ---
+ *
  * Reads task YAML files in the single-definition format and expands each
  * task into gold + baseline Promptfoo test entries. This eliminates the
  * manual duplication where every task had to be written twice.
@@ -34,17 +43,19 @@
  *         value: ["client.fetch", "createClient"]
  *     baseline:
  *       enabled: true
- *       rubric: abbreviated
+ *       rubric: full
  *
  * Expands to:
  *   1. Gold entry — uses vars.docs as-is, resolves templates, appends doc-coverage
- *   2. Baseline entry — sets docs: "", adds transform, uses abbreviated rubric
+ *   2. Baseline entry — sets docs: "", uses full rubric (same assertions as gold)
  */
 import { existsSync, readFileSync, readdirSync } from "fs";
 import { resolve } from "path";
 import { load } from "js-yaml";
 import { ConsoleLogger } from "../adapters/loggers/index.js";
+import { loadConfigFile } from "./compiler/config-loader.js";
 import { RubricConfigSchema } from "./schemas.js";
+import { LiteracyVariant } from "./normalize-mode.js";
 // ---------------------------------------------------------------------------
 // Rubric template loading and assembly
 // ---------------------------------------------------------------------------
@@ -142,7 +153,7 @@ export function clearRubricCache() {
  *     a single prompt that doesn't use `{{docs}}`; baseline entries would be
  *     pure waste — identical prompts, wasted API calls).
  */
-export function expandTask(task, rubricConfig, mode = "baseline") {
+export function expandTask(task, rubricConfig, mode = LiteracyVariant.STANDARD) {
     const entries = [];
     // Resolve all templated assertions
     const resolvedAsserts = task.assert.map((a) => resolveAssert(a, rubricConfig));
@@ -168,20 +179,20 @@ export function expandTask(task, rubricConfig, mode = "baseline") {
     entries.push({
         assert: [...resolvedAsserts],
         description: `${task.description} (gold)`,
-        ...(mode === "baseline" ? { prompts: ["with-docs"] } : {}),
+        ...(mode === LiteracyVariant.STANDARD ? { prompts: ["with-docs"] } : {}),
         vars: { ...task.vars, __featureArea: task.featureArea ?? "" },
     });
     // Baseline entry — floor measurement (no docs, parametric knowledge only).
     // Skipped entirely in agentic mode: the agentic prompt doesn't reference
     // {{docs}}, so gold and baseline would produce identical prompts — pure
     // waste of API calls and cost.
-    if (mode === "agentic") {
+    if (mode === LiteracyVariant.AGENTIC) {
         return entries;
     }
     // Restricted to the 'without-docs' prompt. Unless explicitly disabled.
     const baselineEnabled = task.baseline?.enabled !== false;
     if (baselineEnabled) {
-        const rubricMode = task.baseline?.rubric ?? "abbreviated";
+        const rubricMode = task.baseline?.rubric ?? "full";
         const baselineAsserts = buildBaselineAsserts(resolvedAsserts, rubricMode);
         entries.push({
             description: `${task.description} (baseline)`,
@@ -197,8 +208,8 @@ export function expandTask(task, rubricConfig, mode = "baseline") {
     return entries;
 }
 /**
- * Convert a TaskDefinition (from @sanity/ailf-core) to the local
- * SingleTaskDefinition format used by expandTask().
+ * Convert a LiteracyTaskDefinition to the local SingleTaskDefinition
+ * format used by expandTask().
  *
  * When a task has no canonical docs, the `docs` var is set to empty string
  * instead of a file path. This prevents Promptfoo from trying to read a
@@ -206,38 +217,38 @@ export function expandTask(task, rubricConfig, mode = "baseline") {
  * without canonical docs). The gold entry still runs — it just tests model
  * knowledge alone, same as the baseline variant.
  */
-function taskDefinitionToSingle(task) {
-    const hasDocs = task.canonicalDocs.length > 0;
+function literacyTaskToSingle(task) {
+    const hasDocs = (task.context?.docs?.length ?? 0) > 0;
     return {
-        assert: task.assertions.map((a) => ({ ...a })),
+        assert: (task.assertions ?? []).map((a) => ({ ...a })),
         baseline: task.baseline,
-        description: task.description,
+        description: task.title,
         doc_coverage: task.docCoverage,
-        featureArea: task.featureArea,
+        featureArea: task.area ?? "",
         id: task.id,
         vars: {
             docs: hasDocs ? `file://contexts/canonical/${task.id}.md` : "",
-            task: task.taskPrompt,
-            ...task.extraVars,
+            task: task.prompt?.text ?? "",
+            ...task.prompt?.vars,
         },
     };
 }
 /**
- * Expand an array of TaskDefinition[] (from any TaskSource adapter) into
- * Promptfoo-compatible test entries. This is the TaskSource-aware counterpart
- * of loadAndExpandTasks() — it skips YAML file I/O and works directly with
- * the canonical domain type.
+ * Expand an array of LiteracyTaskDefinition[] (from any TaskSource adapter)
+ * into Promptfoo-compatible test entries. This is the TaskSource-aware
+ * counterpart of loadAndExpandTasks() — it skips YAML file I/O and works
+ * directly with the domain type.
  *
- * @param tasks - Task definitions from any TaskSource adapter
+ * @param tasks - Literacy task definitions from any TaskSource adapter
  * @param rootDir - Eval package root (needed to load rubric templates)
  * @param mode - Expansion mode: 'baseline' (gold + baseline) or 'agentic' (gold only)
  * @returns Expanded test entries and statistics
  */
-export function expandTaskDefinitions(tasks, rootDir, mode = "baseline") {
+export function expandTaskDefinitions(tasks, rootDir, mode = LiteracyVariant.STANDARD) {
     const rubricConfig = loadRubricTemplates(rootDir);
     const entries = [];
     for (const task of tasks) {
-        const single = taskDefinitionToSingle(task);
+        const single = literacyTaskToSingle(task);
         entries.push(...expandTask(single, rubricConfig, mode));
     }
     return {
@@ -314,7 +325,7 @@ export function isTemplatedAssert(entry) {
  *
  * Returns the expanded entries grouped by source file.
  */
-export function loadAndExpandTasks(rootDir, filter, mode = "baseline", logger) {
+export function loadAndExpandTasks(rootDir, filter, mode = LiteracyVariant.STANDARD, logger) {
     const log = logger ?? new ConsoleLogger();
     const tasksDir = resolve(rootDir, "tasks");
     if (!existsSync(tasksDir)) {
@@ -420,18 +431,13 @@ export function loadAndExpandTasks(rootDir, filter, mode = "baseline", logger) {
 export function loadRubricTemplates(rootDir) {
     if (cachedRubricConfig)
         return cachedRubricConfig;
-    const filePath = resolve(rootDir, "config", "rubrics.yaml");
-    if (!existsSync(filePath)) {
-        throw new Error(`config/rubrics.yaml not found at ${filePath}`);
-    }
-    const raw = readFileSync(filePath, "utf-8");
-    const parsed = load(raw);
-    const result = RubricConfigSchema.safeParse(parsed);
+    const { data } = loadConfigFile("rubrics", rootDir);
+    const result = RubricConfigSchema.safeParse(data);
     if (!result.success) {
         const messages = result.error.issues
             .map((i) => `  [${i.path.join(".")}]: ${i.message}`)
             .join("\n");
-        throw new Error(`Invalid config/rubrics.yaml:\n${messages}`);
+        throw new Error(`Invalid config/rubrics:\n${messages}`);
     }
     cachedRubricConfig = result.data;
     return result.data;

package/dist/pipeline/gap-analysis.d.ts CHANGED Viewed

@@ -34,7 +34,7 @@ export declare function buildGapAnalysisReport(failureModeReport: FailureModeRep
  *
  * @param failureModeReport - Classified failure modes from Phase 3a
  * @param scores - Per-area feature scores
- * @param weights - Dimension weights (defaults to rubrics.yaml weights)
+ * @param weights - Dimension weights (defaults to rubrics config weights)
  * @returns Gap estimates sorted by priority (highest first)
  */
 export declare function estimateImpact(failureModeReport: FailureModeReport, scores: FeatureScore[], weights?: Record<string, number>): GapEstimate[];

package/dist/pipeline/gap-analysis.js CHANGED Viewed

@@ -18,7 +18,7 @@
 // ---------------------------------------------------------------------------
 // Constants
 // ---------------------------------------------------------------------------
-/** Default dimension weights (must match rubrics.yaml) */
+/** Default dimension weights (must match rubrics config) */
 const DEFAULT_WEIGHTS = {
     "code-correctness": 0.25,
     "doc-coverage": 0.25,
@@ -71,7 +71,7 @@ export function buildGapAnalysisReport(failureModeReport, scores, weights) {
  *
  * @param failureModeReport - Classified failure modes from Phase 3a
  * @param scores - Per-area feature scores
- * @param weights - Dimension weights (defaults to rubrics.yaml weights)
+ * @param weights - Dimension weights (defaults to rubrics config weights)
  * @returns Gap estimates sorted by priority (highest first)
  */
 export function estimateImpact(failureModeReport, scores, weights = DEFAULT_WEIGHTS) {

package/dist/pipeline/generate-configs.d.ts CHANGED Viewed

@@ -1,6 +1,21 @@
 /**
  * pipeline/generate-configs.ts
  *
+ * @deprecated This is the LEGACY compilation path. New code should use the
+ * config compiler pipeline instead:
+ *
+ *   import { compileLiteracyTasks } from "./compiler/literacy-bridge.js"
+ *   import { buildTaskGraph, compileToPromptfoo } from "./compiler/index.js"
+ *
+ * This file is retained behind the `--legacy-compiler` CLI flag as an
+ * emergency fallback during the migration period. It will be removed once
+ * the new compiler has been validated in production.
+ *
+ * @see packages/eval/src/pipeline/compiler/ — the new compiler pipeline
+ * @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
+ *
+ * ---
+ *
  * Reads config/models.yaml (the central model registry) and generates all
  * promptfoo config files with the correct provider entries.
  *
@@ -18,7 +33,7 @@
  * @see config/models.yaml — the central model registry
  * @see docs/exec-plans/eliminate-lib-layer.md
  */
-import { type Logger, type TaskDefinition } from "../_vendor/ailf-core/index.d.ts";
+import { type LiteracyTaskDefinition, type Logger } from "../_vendor/ailf-core/index.d.ts";
 import type { FilterOptions } from "./types.js";
 import { type ResolvedSourceConfig } from "../sources.js";
 export { extractModelName, extractProvider, mergeConfig, modelMatchesMode, } from "../_vendor/ailf-core/index.d.ts";
@@ -41,7 +56,10 @@ interface LoadedPrompts {
         raw: string;
     };
 }
-/** Load prompt templates from config/prompts.yaml. Throws if missing or malformed. */
+/**
+ * Load prompt templates. Uses handler-owned literacy templates as defaults,
+ * with config/prompts.ts as an override layer for user customization.
+ */
 export declare function loadPrompts(rootDir: string): LoadedPrompts;
 /** Options for the generateConfigs function. */
 export interface GenerateConfigsOptions {
@@ -59,10 +77,10 @@ export interface GenerateConfigsOptions {
     searchMode?: string;
     /** Documentation source name (e.g., "branch", "local") */
     source?: string;
-    /** Pre-loaded task definitions from a TaskSource adapter.
+    /** Pre-loaded literacy task definitions from a TaskSource adapter.
      *  When provided, expandTaskDefinitions() is used instead of
      *  loadAndExpandTasks() (which reads from tasks/*.yaml files). */
-    tasks?: TaskDefinition[];
+    tasks?: LiteracyTaskDefinition[];
 }
 /**
  * Generate Promptfoo config files from models.yaml + task definitions.