npm - @sanity/ailf - Versions diffs - 0.4.1 → 1.0.0 - Mend

@sanity/ailf 0.4.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (290) hide show

package/config/features.ts +23 -0
package/config/models.ts +83 -0
package/config/prompts.ts +16 -0
package/config/rubrics.ts +225 -0
package/config/schedules.ts +47 -0
package/config/sinks.ts +37 -0
package/config/sources.ts +21 -0
package/config/thresholds.ts +61 -0
package/dist/_vendor/ailf-core/config-helpers.d.ts +174 -0
package/dist/_vendor/ailf-core/config-helpers.js +150 -0
package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
package/dist/_vendor/ailf-core/env-helper.js +45 -0
package/dist/_vendor/ailf-core/examples/index.d.ts +10 -10
package/dist/_vendor/ailf-core/examples/index.js +10 -10
package/dist/_vendor/ailf-core/index.d.ts +3 -0
package/dist/_vendor/ailf-core/index.js +5 -0
package/dist/_vendor/ailf-core/ports/context.d.ts +15 -2
package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +7 -2
package/dist/_vendor/ailf-core/schemas/eval-config.js +7 -2
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +8 -3
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +6 -1
package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +32 -31
package/dist/_vendor/ailf-core/schemas/pipeline.js +52 -12
package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
package/dist/_vendor/ailf-core/services/index.js +2 -1
package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
package/dist/_vendor/ailf-core/services/scoring.js +25 -15
package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
package/dist/_vendor/ailf-core/types/generalized-task.d.ts +319 -0
package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
package/dist/_vendor/ailf-core/types/index.d.ts +45 -81
package/dist/_vendor/ailf-core/types/index.js +8 -1
package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +202 -0
package/dist/_vendor/ailf-core/types/plugin-registry.js +132 -0
package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
package/dist/_vendor/ailf-core/types/trace.js +18 -0
package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
package/dist/_vendor/ailf-shared/index.d.ts +0 -1
package/dist/_vendor/ailf-shared/index.js +0 -1
package/dist/adapters/api-client/build-request.js +14 -13
package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
package/dist/adapters/config-sources/file-config-adapter.js +38 -12
package/dist/adapters/config-sources/index.d.ts +2 -0
package/dist/adapters/config-sources/index.js +1 -0
package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
package/dist/adapters/config-sources/ts-config-loader.js +133 -0
package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
package/dist/adapters/task-sources/composite-task-source.js +1 -1
package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
package/dist/adapters/task-sources/content-lake-task-source.js +22 -23
package/dist/adapters/task-sources/index.d.ts +1 -0
package/dist/adapters/task-sources/index.js +1 -0
package/dist/adapters/task-sources/repo-task-source.d.ts +4 -4
package/dist/adapters/task-sources/repo-task-source.js +69 -16
package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
package/dist/adapters/task-sources/task-file-loader.js +83 -0
package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
package/dist/adapters/task-sources/yaml-task-source.js +19 -16
package/dist/cli.js +0 -2
package/dist/commands/baseline.js +4 -1
package/dist/commands/calculate-scores.js +1 -1
package/dist/commands/coverage-audit.js +7 -1
package/dist/commands/explain-handler.js +25 -23
package/dist/commands/fetch-docs.js +3 -2
package/dist/commands/generate-configs.js +1 -1
package/dist/commands/interactive.js +11 -7
package/dist/commands/pipeline-action.d.ts +2 -0
package/dist/commands/pipeline-action.js +16 -6
package/dist/commands/pipeline.d.ts +1 -0
package/dist/commands/pipeline.js +4 -2
package/dist/commands/pr-comment.js +1 -1
package/dist/commands/publish.js +2 -2
package/dist/commands/readiness-report.js +13 -6
package/dist/composition-root.d.ts +1 -1
package/dist/composition-root.js +67 -4
package/dist/orchestration/build-app-context.js +1 -0
package/dist/orchestration/build-step-sequence.js +24 -6
package/dist/orchestration/steps/calculate-scores-step.js +24 -11
package/dist/orchestration/steps/fetch-docs-step.js +6 -4
package/dist/orchestration/steps/gap-analysis-step.js +8 -7
package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
package/dist/orchestration/steps/generate-configs-step.js +245 -51
package/dist/orchestration/steps/grader-consistency-step.js +7 -4
package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
package/dist/orchestration/steps/readiness-step.js +5 -6
package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
package/dist/orchestration/steps/run-eval-step.js +8 -7
package/dist/pipeline/cache.d.ts +1 -1
package/dist/pipeline/cache.js +36 -8
package/dist/pipeline/calculate-scores.d.ts +5 -7
package/dist/pipeline/calculate-scores.js +74 -153
package/dist/pipeline/checks.js +2 -2
package/dist/pipeline/compare.js +8 -8
package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +355 -0
package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +471 -0
package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
package/dist/pipeline/compiler/assertion-mapper.js +175 -0
package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
package/dist/pipeline/compiler/config-loader.d.ts +56 -0
package/dist/pipeline/compiler/config-loader.js +111 -0
package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
package/dist/pipeline/compiler/fixture-resolver.js +113 -0
package/dist/pipeline/compiler/hash.d.ts +11 -0
package/dist/pipeline/compiler/hash.js +18 -0
package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
package/dist/pipeline/compiler/ignore-fields.js +113 -0
package/dist/pipeline/compiler/index.d.ts +29 -0
package/dist/pipeline/compiler/index.js +45 -0
package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
package/dist/pipeline/compiler/literacy-bridge.js +172 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
package/dist/pipeline/compiler/mode-handlers/index.d.ts +16 -0
package/dist/pipeline/compiler/mode-handlers/index.js +21 -0
package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +277 -0
package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +67 -0
package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +309 -0
package/dist/pipeline/compiler/presets/index.d.ts +9 -0
package/dist/pipeline/compiler/presets/index.js +8 -0
package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +45 -0
package/dist/pipeline/compiler/presets/sanity-literacy.js +354 -0
package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
package/dist/pipeline/compiler/provider-assembler.js +137 -0
package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
package/dist/pipeline/compiler/sandbox/index.js +11 -0
package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
package/dist/pipeline/compiler/scoring-bridge.js +114 -0
package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
package/dist/pipeline/compiler/task-graph-builder.js +291 -0
package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
package/dist/pipeline/compiler/telemetry/index.js +19 -0
package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
package/dist/pipeline/compiler/variable-resolver.js +115 -0
package/dist/pipeline/coverage-audit.d.ts +15 -5
package/dist/pipeline/coverage-audit.js +41 -22
package/dist/pipeline/eval-constants.d.ts +16 -6
package/dist/pipeline/eval-constants.js +25 -4
package/dist/pipeline/eval-fingerprint.d.ts +2 -2
package/dist/pipeline/eval-fingerprint.js +8 -9
package/dist/pipeline/expand-tasks.d.ts +23 -14
package/dist/pipeline/expand-tasks.js +37 -31
package/dist/pipeline/gap-analysis.d.ts +1 -1
package/dist/pipeline/gap-analysis.js +2 -2
package/dist/pipeline/generate-configs.d.ts +22 -4
package/dist/pipeline/generate-configs.js +53 -24
package/dist/pipeline/grader-api.d.ts +3 -3
package/dist/pipeline/grader-api.js +5 -12
package/dist/pipeline/grader-compare-runner.js +20 -27
package/dist/pipeline/grader-comparison.d.ts +4 -8
package/dist/pipeline/grader-comparison.js +11 -17
package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
package/dist/pipeline/grader-consistency-runner.js +18 -21
package/dist/pipeline/grader-consistency.d.ts +6 -10
package/dist/pipeline/grader-consistency.js +13 -32
package/dist/pipeline/grader-sensitivity-runner.js +7 -5
package/dist/pipeline/grader-sensitivity.d.ts +2 -6
package/dist/pipeline/grader-sensitivity.js +10 -10
package/dist/pipeline/grader-validate-runner.js +7 -5
package/dist/pipeline/grader-validation.d.ts +2 -6
package/dist/pipeline/grader-validation.js +14 -22
package/dist/pipeline/map-request-to-config.js +6 -1
package/dist/pipeline/mirror-repo-tasks.d.ts +6 -6
package/dist/pipeline/mirror-repo-tasks.js +16 -15
package/dist/pipeline/normalize-mode.d.ts +49 -0
package/dist/pipeline/normalize-mode.js +64 -0
package/dist/pipeline/plan.d.ts +5 -2
package/dist/pipeline/plan.js +134 -78
package/dist/pipeline/pr-comment.js +2 -0
package/dist/pipeline/profile-resolution.d.ts +47 -0
package/dist/pipeline/profile-resolution.js +91 -0
package/dist/pipeline/provenance.d.ts +2 -2
package/dist/pipeline/provenance.js +12 -17
package/dist/pipeline/release-report.js +4 -4
package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
package/dist/pipeline/repo-threshold-evaluator.js +1 -1
package/dist/pipeline/rubric-loader.d.ts +20 -0
package/dist/pipeline/rubric-loader.js +37 -0
package/dist/pipeline/validate.d.ts +4 -4
package/dist/pipeline/validate.js +64 -53
package/dist/schedules/loader.js +18 -8
package/dist/scripts/migrate-task-mode.d.ts +24 -0
package/dist/scripts/migrate-task-mode.js +85 -0
package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
package/dist/scripts/validate-task-sources.d.ts +1 -1
package/dist/scripts/validate-task-sources.js +15 -15
package/dist/sinks/loader.js +5 -7
package/dist/sources.d.ts +7 -7
package/dist/sources.js +22 -24
package/dist/webhook/dispatch.js +2 -1
package/package.json +6 -3
package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
package/tasks/literacy/frameworks.task.ts +128 -0
package/tasks/literacy/functions.task.ts +69 -0
package/tasks/literacy/groq.task.ts +258 -0
package/tasks/literacy/nextjs-live.task.ts +75 -0
package/tasks/literacy/studio-setup.task.ts +131 -0
package/tasks/literacy/visual-editing.task.ts +146 -0
package/config/features.yaml +0 -116
package/config/models.yaml +0 -116
package/config/prompts.yaml +0 -75
package/config/rubrics.yaml +0 -62
package/config/schedules.yaml +0 -43
package/config/sinks.yaml +0 -54
package/config/sources.yaml +0 -51
package/config/thresholds.yaml +0 -49
package/dist/agent-observer/test-imports.d.ts +0 -7
package/dist/agent-observer/test-imports.js +0 -185

package/dist/commands/explain-handler.js CHANGED Viewed

@@ -23,6 +23,7 @@ import { TASK_FILE_NAMES } from "../_vendor/ailf-core/index.js";
 import { buildPipelinePlan, buildSimpleCommandPlan, } from "../pipeline/plan.js";
 import { formatPlanConsole, formatPlanJson } from "../pipeline/plan-format.js";
 import { computeResolvedOptions } from "./pipeline-action.js";
+import { LiteracyVariant } from "../pipeline/normalize-mode.js";
 // ---------------------------------------------------------------------------
 // Registry
 // ---------------------------------------------------------------------------
@@ -84,8 +85,8 @@ const EXPLAIN_REGISTRY = {
         filesCreated: ["results/latest/score-summary.json"],
         filesRead: [
             "results/latest/eval-results.json",
-            "config/rubrics.yaml",
-            "config/models.yaml",
+            "config/rubrics.ts",
+            "config/models.ts",
         ],
         steps: [
             {
@@ -138,12 +139,12 @@ const EXPLAIN_REGISTRY = {
     },
     "coverage-audit": {
         description: "Cross-reference feature registry against evaluation tasks for coverage gaps",
-        filesRead: ["config/features.yaml", "tasks/*.yaml"],
+        filesRead: ["config/features.ts", "tasks/*.{yaml,task.ts,task.js}"],
         steps: [
             {
                 cacheStatus: "miss",
                 name: "Load feature registry",
-                reason: "Parse config/features.yaml for product feature list",
+                reason: "Parse config/features.ts for product feature list",
                 willRun: true,
             },
             {
@@ -201,7 +202,7 @@ const EXPLAIN_REGISTRY = {
     "fetch-docs": {
         description: "Fetch documentation from Sanity CMS and generate canonical context files",
         filesCreated: ["contexts/canonical/*.md"],
-        filesRead: ["config/sources.yaml", "config/models.yaml"],
+        filesRead: ["config/sources.ts", "config/models.ts"],
         steps: [
             {
                 cacheStatus: "miss",
@@ -224,7 +225,7 @@ const EXPLAIN_REGISTRY = {
         ],
     },
     "generate-configs": {
-        description: "Generate Promptfoo config files from models.yaml and task definitions",
+        description: "Generate Promptfoo config files from models.ts and task definitions",
         filesCreated: [
             "promptfooconfig.yaml",
             "promptfooconfig.observed.yaml",
@@ -232,16 +233,16 @@ const EXPLAIN_REGISTRY = {
             "tasks/.expanded.yaml",
         ],
         filesRead: [
-            "config/models.yaml",
-            "config/prompts.yaml",
-            "config/rubrics.yaml",
-            "config/sources.yaml",
+            "config/models.ts",
+            "config/prompts.ts",
+            "config/rubrics.ts",
+            "config/sources.ts",
         ],
         steps: [
             {
                 cacheStatus: "miss",
                 name: "Load models",
-                reason: "Parse config/models.yaml for active model list",
+                reason: "Parse config/models.ts for active model list",
                 willRun: true,
             },
             {
@@ -262,7 +263,7 @@ const EXPLAIN_REGISTRY = {
         description: "Grader reliability tools (consistency, compare, sensitivity, validate)",
         filesRead: [
             "results/latest/eval-results.json",
-            "config/rubrics.yaml",
+            "config/rubrics.ts",
             "canonical/reference-solutions/",
         ],
         steps: [
@@ -369,7 +370,7 @@ const EXPLAIN_REGISTRY = {
         filesRead: [
             "results/latest/score-summary.json",
             "results/latest/gap-analysis.json",
-            "config/thresholds.yaml",
+            "config/thresholds.ts",
             "results/baselines/",
         ],
         filesCreated: ["results/latest/readiness-report.md"],
@@ -377,7 +378,7 @@ const EXPLAIN_REGISTRY = {
             {
                 cacheStatus: "miss",
                 name: "Load scores + thresholds",
-                reason: "Read score-summary.json and thresholds.yaml for gate evaluation",
+                reason: "Read score-summary.json and thresholds.ts for gate evaluation",
                 willRun: true,
             },
             {
@@ -395,18 +396,18 @@ const EXPLAIN_REGISTRY = {
         ],
     },
     validate: {
-        description: "Validate all YAML config files, task definitions, reference solutions, and environment",
+        description: "Validate all config files, task definitions, reference solutions, and environment",
         filesRead: [
-            "config/models.yaml",
-            "config/rubrics.yaml",
-            "config/features.yaml",
-            "config/thresholds.yaml",
+            "config/models.ts",
+            "config/rubrics.ts",
+            "config/features.ts",
+            "config/thresholds.ts",
         ],
         steps: [
             {
                 cacheStatus: "miss",
                 name: "Validate configuration",
-                reason: "Parse all YAML configs through Zod schemas, cross-reference mappings",
+                reason: "Parse all config files through Zod schemas, cross-reference mappings",
                 willRun: true,
             },
             {
@@ -454,12 +455,12 @@ const EXPLAIN_REGISTRY = {
     },
     "weekly-digest": {
         description: "Generate and deliver a weekly evaluation trend digest via Slack",
-        filesRead: ["config/schedules.yaml", "config/sinks.yaml"],
+        filesRead: ["config/schedules.ts", "config/sinks.ts"],
         steps: [
             {
                 cacheStatus: "miss",
                 name: "Load digest config",
-                reason: "Read schedules.yaml for lookback window and delivery targets",
+                reason: "Read schedules.ts for lookback window and delivery targets",
                 willRun: true,
             },
             {
@@ -670,7 +671,7 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
         graderReplications: raw.graderReplications,
         header: raw.header ?? [],
         headers: raw.headers ?? [],
-        mode: raw.mode ?? "full",
+        mode: raw.mode ?? LiteracyVariant.FULL,
         output: raw.output,
         promptfooUrl: raw.promptfooUrl,
         publish: raw.publish,
@@ -714,6 +715,7 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
         gapAnalysisEnabled: resolved.gapAnalysisEnabled,
         graderReplications: resolved.graderReplications,
         mode: resolved.mode,
+        variant: resolved.variant,
         noCache: resolved.noCache,
         publishEnabled: resolved.publishEnabled,
         readinessEnabled: resolved.readinessEnabled,

package/dist/commands/fetch-docs.js CHANGED Viewed

@@ -41,7 +41,7 @@ async function executeFetchDocs(opts) {
     // Build a minimal ResolvedConfig for the composition root
     const ctx = createAppContext({
         rootDir: ROOT,
-        mode: "baseline",
+        mode: "literacy",
         noAutoScope: false,
         skipFetch: false,
         skipEval: true,
@@ -83,7 +83,8 @@ async function executeFetchDocs(opts) {
     }
     // Canonical contexts — same code path as the pipeline
     const tasks = await ctx.taskSource.loadTasks();
-    const tasksWithDocs = tasks.filter((t) => t.canonicalDocs.length > 0);
+    // Bridge: narrow to literacy tasks with docs (only literacy tasks have context.docs)
+    const tasksWithDocs = tasks.filter((t) => t.mode === "literacy" && (t.context?.docs?.length ?? 0) > 0);
     if (tasksWithDocs.length > 0) {
         console.log("\nGenerating canonical (gold-retrieval) contexts...\n");
         const result = await fetcher.fetch(tasksWithDocs, resolvedSource);

package/dist/commands/generate-configs.js CHANGED Viewed

@@ -19,7 +19,7 @@ export function createGenerateConfigsCommand() {
         try {
             const ctx = createAppContext({
                 rootDir: ROOT,
-                mode: "baseline",
+                mode: "literacy",
                 noAutoScope: false,
                 skipFetch: true,
                 skipEval: true,

package/dist/commands/interactive.js CHANGED Viewed

@@ -9,6 +9,10 @@
  * Uses @inquirer/prompts for a clean, modern terminal UI.
  */
 import { Command } from "commander";
+import { LiteracyVariant } from "../pipeline/normalize-mode.js";
+// CLI command name for the baseline snapshot management subcommand.
+// Defined as a constant to avoid scattering the literal string across routing code.
+const BASELINE_CMD = "baseline";
 export function createInteractiveCommand() {
     return new Command("interactive")
         .description("Guided wizard for common evaluation workflows")
@@ -65,7 +69,7 @@ async function runInteractiveWizard() {
             {
                 description: "Save, compare, or list historical score snapshots",
                 name: "Manage baselines",
-                value: "baseline",
+                value: BASELINE_CMD,
             },
             {
                 description: "Weekly evaluation trends and area summaries",
@@ -93,7 +97,7 @@ async function runInteractiveWizard() {
         });
         return { args: dryRun ? ["--dry-run"] : [], command: "weekly-digest" };
     }
-    if (workflow === "baseline") {
+    if (workflow === BASELINE_CMD) {
         const subcommand = await select({
             choices: [
                 { name: "Save current scores", value: "save" },
@@ -102,7 +106,7 @@ async function runInteractiveWizard() {
             ],
             message: "Baseline operation:",
         });
-        return { args: [subcommand], command: "baseline" };
+        return { args: [subcommand], command: BASELINE_CMD };
     }
     if (workflow === "grader") {
         const subcommand = await select({
@@ -140,22 +144,22 @@ async function runInteractiveWizard() {
             {
                 description: "Evaluate with pre-fetched documentation context",
                 name: "Baseline (with docs vs without docs)",
-                value: "baseline",
+                value: LiteracyVariant.STANDARD,
             },
             {
                 description: "Baseline + record HTTP request patterns",
                 name: "Observed (instrumented)",
-                value: "observed",
+                value: LiteracyVariant.OBSERVED,
             },
             {
                 description: "Agent searches for docs itself via web tools",
                 name: "Agentic (agent-driven retrieval)",
-                value: "agentic",
+                value: LiteracyVariant.AGENTIC,
             },
         ],
         message: "Evaluation mode:",
     });
-    if (mode !== "baseline") {
+    if (mode !== LiteracyVariant.STANDARD) {
         args.push("--mode", mode);
     }
     // Step 3: Area scoping

package/dist/commands/pipeline-action.d.ts CHANGED Viewed

@@ -31,6 +31,8 @@ export interface ResolvedOptions {
     headerArgs: string[];
     impactSummary?: ImpactSummary;
     mode: EvalMode;
+    /** Literacy variant — set when the user passes a legacy mode name */
+    variant?: string;
     noAutoScope: boolean;
     noCache: boolean;
     noRemoteCache: boolean;

package/dist/commands/pipeline-action.js CHANGED Viewed

@@ -14,6 +14,7 @@ import { existsSync, readFileSync, writeFileSync } from "fs";
 import { dirname, resolve } from "path";
 import { fileURLToPath } from "url";
 import { classifyUrls } from "../pipeline/classify-url.js";
+import { normalizeMode } from "../pipeline/normalize-mode.js";
 import { assessImpact, buildReverseMapping, } from "../pipeline/reverse-mapping.js";
 import { buildAppContext } from "../orchestration/build-app-context.js";
 import { buildStepSequence } from "../orchestration/build-step-sequence.js";
@@ -23,9 +24,8 @@ import { parseRepoConfig, } from "../adapters/task-sources/repo-schemas.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..", "..");
 // ---------------------------------------------------------------------------
-// Valid modes & search modes
+// Valid search modes
 // ---------------------------------------------------------------------------
-const VALID_MODES = ["baseline", "observed", "agentic", "full"];
 const VALID_SEARCH_MODES = ["open", "origin-only", "off"];
 /**
  * Pure option resolution — computes ResolvedOptions from CLI flags without
@@ -36,10 +36,19 @@ const VALID_SEARCH_MODES = ["open", "origin-only", "off"];
 export function computeResolvedOptions(opts) {
     // Resolve paths relative to the caller's cwd, not the eval package root
     const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
-    // Validate mode
-    const mode = opts.mode;
-    if (!VALID_MODES.includes(mode)) {
-        console.error(`❌ Invalid mode "${opts.mode}". Must be one of: ${VALID_MODES.join(", ")}`);
+    // Validate + normalize mode via the single boundary function.
+    // normalizeMode() maps legacy variant names (baseline, agentic, etc.)
+    // to canonical mode "literacy" + variant, and throws on invalid input.
+    let mode;
+    let variant;
+    try {
+        const normalized = normalizeMode(opts.mode);
+        mode = normalized.mode;
+        // Explicit --variant flag takes precedence over what normalizeMode inferred
+        variant = opts.variant ?? normalized.variant;
+    }
+    catch (err) {
+        console.error(`❌ ${err instanceof Error ? err.message : String(err)}`);
         process.exit(1);
     }
     // Debug options — any sub-flag (--debug-n, --debug-pattern, --debug-sample)
@@ -220,6 +229,7 @@ export function computeResolvedOptions(opts) {
         headerArgs,
         impactSummary,
         mode,
+        variant,
         noAutoScope: opts.autoScope === false,
         noCache: !opts.cache,
         noRemoteCache: opts.remoteCache === false,

package/dist/commands/pipeline.d.ts CHANGED Viewed

@@ -35,6 +35,7 @@ export interface PipelineCliOptions {
     header: string[];
     headers: string[];
     mode: string;
+    variant?: string;
     output?: string;
     promptfooUrl?: string;
     publish?: boolean;

package/dist/commands/pipeline.js CHANGED Viewed

@@ -8,11 +8,13 @@
  * @see docs/CLI.md for the full flag reference.
  */
 import { Command } from "commander";
+import { LiteracyVariant } from "../pipeline/normalize-mode.js";
 import { addAgenticOptions, addDebugOptions, addSanitySourceOptions, } from "./shared/options.js";
 export function createPipelineCommand() {
     const cmd = new Command("pipeline")
         .description("Run the full evaluation pipeline")
-        .option("-m, --mode <mode>", "Evaluation mode: full (default — floor + ceiling + actual), baseline (floor + ceiling only), agentic (actual only), observed", "full")
+        .option("-m, --mode <mode>", "Evaluation mode: literacy (default), mcp-server, agent-harness, knowledge-probe, custom. Legacy aliases (baseline, agentic, observed, full) are accepted and normalized to literacy + variant.", LiteracyVariant.FULL)
+        .option("--variant <variant>", "Literacy variant: full (default — standard + agentic), baseline (standard only), agentic (agentic only), observed. Only applies to --mode literacy.")
         .option("-s, --source <name>", "Documentation source name (from sources.yaml)")
         .option("-n, --dry-run", "Validate configuration only, no execution", false)
         .option("--skip-fetch", "Reuse cached documentation contexts", false)
@@ -44,7 +46,7 @@ export function createPipelineCommand() {
         .option("--publish-tag <tag>", "Label for published report")
         .option("--report-dataset <name>", "Sanity dataset for report store")
         .option("--report-project <id>", "Sanity project ID for report store")
-        .option("--config <path>", "Load pipeline config from a JSON/YAML file (overrides most CLI flags)")
+        .option("--config <path>", "Load pipeline config from a TS/JS/YAML/JSON file (overrides most CLI flags)")
         .option("-o, --output <path>", "Write PR comment markdown to file")
         .option("--promptfoo-url <url>", "Promptfoo share URL for report")
         .option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), repo (repo tasks only, no Content Lake merge), yaml (tasks/*.yaml files, legacy)", "content-lake")

package/dist/commands/pr-comment.js CHANGED Viewed

@@ -20,7 +20,7 @@ export function createPrCommentCommand() {
         try {
             const ctx = createAppContext({
                 rootDir: ROOT,
-                mode: "baseline",
+                mode: "literacy",
                 noAutoScope: false,
                 skipFetch: true,
                 skipEval: true,

package/dist/commands/publish.js CHANGED Viewed

@@ -52,7 +52,7 @@ export function createPublishCommand() {
  */
 function buildProvenanceFromSummary(summary) {
     const areas = summary.scores.map((s) => s.feature);
-    const mode = (process.env.EVAL_MODE ?? "baseline");
+    const mode = (process.env.EVAL_MODE ?? "literacy");
     const source = {
         baseUrl: summary.source?.baseUrl ?? "https://www.sanity.io/docs",
         dataset: summary.source?.dataset ?? process.env.SANITY_DATASET ?? "next",
@@ -83,7 +83,7 @@ async function runPublishCommand(summaryPath, opts) {
         compareEnabled: false,
         discoveryReportEnabled: false,
         gapAnalysisEnabled: false,
-        mode: "baseline",
+        mode: "literacy",
         noAutoScope: false,
         noCache: true,
         noRemoteCache: true,

package/dist/commands/readiness-report.js CHANGED Viewed

@@ -10,14 +10,14 @@ import { Command } from "commander";
 import { existsSync, readFileSync, readdirSync, writeFileSync } from "fs";
 import { dirname, join, resolve } from "path";
 import { fileURLToPath } from "url";
-import { load } from "js-yaml";
+import { ConfigNotFoundError, loadConfigFile, } from "../pipeline/compiler/config-loader.js";
 import { formatReadinessMarkdown, generateReadinessReport, } from "../pipeline/readiness-report.js";
 import { ThresholdConfigSchema, } from "../pipeline/schemas.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const ROOT = resolve(__dirname, "..", "..");
 const SCORE_SUMMARY_PATH = join(ROOT, "results", "latest", "score-summary.json");
 const GAP_ANALYSIS_PATH = join(ROOT, "results", "latest", "gap-analysis.json");
-const THRESHOLDS_PATH = join(ROOT, "config", "thresholds.yaml");
+// thresholds loaded via loadConfigFile below
 const BASELINES_DIR = join(ROOT, "results", "baselines");
 export function createReadinessReportCommand() {
     return new Command("readiness-report")
@@ -33,12 +33,19 @@ export function createReadinessReportCommand() {
         }
         const scoreSummary = JSON.parse(readFileSync(SCORE_SUMMARY_PATH, "utf-8"));
         // Load threshold config
-        if (!existsSync(THRESHOLDS_PATH)) {
-            console.error(`❌ Threshold config not found at ${THRESHOLDS_PATH}.`);
+        let parsedThresholds;
+        try {
+            parsedThresholds = loadConfigFile("thresholds", ROOT).data;
+        }
+        catch (err) {
+            if (err instanceof ConfigNotFoundError) {
+                console.error("❌ Threshold config not found in config/.");
+            }
+            else {
+                console.error(`❌ Failed to load threshold config: ${err instanceof Error ? err.message : err}`);
+            }
             process.exit(1);
         }
-        const rawThresholds = readFileSync(THRESHOLDS_PATH, "utf-8");
-        const parsedThresholds = load(rawThresholds);
         const thresholdResult = ThresholdConfigSchema.safeParse(parsedThresholds);
         if (!thresholdResult.success) {
             const messages = thresholdResult.error.issues

package/dist/composition-root.d.ts CHANGED Viewed

@@ -15,7 +15,7 @@
  * @see packages/core/src/ports/context.ts — AppContext interface
  * @see docs/exec-plans/ports-and-adapters/phase-7-composition-root.md
  */
-import type { AppContext, ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
+import { type AppContext, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
 /**
  * Create a fully wired AppContext from resolved configuration.
  *

package/dist/composition-root.js CHANGED Viewed

@@ -15,12 +15,13 @@
  * @see packages/core/src/ports/context.ts — AppContext interface
  * @see docs/exec-plans/ports-and-adapters/phase-7-composition-root.md
  */
+import { InMemoryPluginRegistry, } from "./_vendor/ailf-core/index.js";
 import { ContentLakeCacheAdapter } from "./adapters/cache/content-lake-cache.js";
 import { FilesystemCache } from "./adapters/cache/filesystem-cache.js";
-import { SanityDocFetcher } from "./adapters/doc-fetchers/index.js";
 import { PromptfooEvalAdapter } from "./adapters/eval-runners/promptfoo-eval-adapter.js";
 import { ConsoleLogger, JsonLogger, QuietLogger, } from "./adapters/loggers/index.js";
 import { CompositeTaskSource, ContentLakeTaskSource, RepoTaskSource, YamlTaskSource, } from "./adapters/task-sources/index.js";
+import { createSanityLiteracyPreset } from "./pipeline/compiler/presets/index.js";
 import { getSanityClient } from "./sanity/client.js";
 import { ReportStore } from "./report-store.js";
 import { loadSinks } from "./sinks/index.js";
@@ -38,13 +39,18 @@ export function createAppContext(config) {
     const cache = config.noCache ? undefined : createCache(config);
     // Task source — selected by config.taskSourceType
     const taskSource = createTaskSource(config);
-    // Doc fetcher — Sanity Content Lake
-    const docFetcher = new SanityDocFetcher(config.rootDir);
+    // Plugin registry — mode handlers, assertions, rubric templates, doc fetcher.
+    // The Sanity preset is registered here with config.rootDir so its doc fetcher
+    // factory resolves paths relative to the eval package root (not cwd).
+    const registry = createRegistry(config.rootDir);
+    // Doc fetcher — provided by the registered preset's factory
+    const docFetcherFactory = registry.getDocFetcherFactory();
+    const docFetcher = docFetcherFactory ? docFetcherFactory() : undefined;
     // Eval runner — Promptfoo subprocess
     const evalRunner = new PromptfooEvalAdapter(config.rootDir);
     // Report store — Sanity Content Lake (for publish + auto-compare)
     const reportStore = createReportStore(config);
-    // Sinks — loaded from config/sinks.yaml
+    // Sinks — loaded from config/sinks
     const sinks = loadSinks();
     return {
         cache,
@@ -52,6 +58,7 @@ export function createAppContext(config) {
         docFetcher,
         evalRunner,
         logger,
+        registry,
         reportStore,
         sinks,
         taskSource,
@@ -113,6 +120,62 @@ function createTaskSource(config) {
     }
     return primary;
 }
+// ---------------------------------------------------------------------------
+// Built-in mode registrations for non-literacy modes
+// ---------------------------------------------------------------------------
+const BUILT_IN_MODES = [
+    {
+        id: "knowledge-probe",
+        label: "Knowledge Probe",
+        validProviderPatterns: ["^openai:", "^anthropic:", "^file://"],
+        rubricTemplateIds: [],
+        handlerModule: "./mode-handlers/knowledge-probe-handler.js",
+    },
+    {
+        id: "mcp-server",
+        label: "MCP Server Testing",
+        validProviderPatterns: ["^mcp:", "^file://"],
+        rubricTemplateIds: [
+            "mcp-input-validation",
+            "mcp-output-correctness",
+            "mcp-error-handling",
+        ],
+        handlerModule: "./mode-handlers/mcp-server-handler.js",
+    },
+    {
+        id: "agent-harness",
+        label: "Agent Harness",
+        validProviderPatterns: ["^openai:", "^anthropic:", "^file://"],
+        rubricTemplateIds: [],
+        handlerModule: "./mode-handlers/agent-harness-handler.js",
+    },
+];
+/**
+ * Build and populate the plugin registry.
+ *
+ * Preset registration flow:
+ * 1. A preset is a PresetDefinition — a bundle of modes, assertions, rubric
+ *    templates, prompt templates, scoring profiles, a doc fetcher factory,
+ *    source definitions, and feature definitions.
+ * 2. registerPreset() iterates the preset's fields and delegates each one to
+ *    the appropriate register method (registerMode, registerRubricTemplate, …).
+ * 3. After registration the rest of createAppContext() can pull capabilities
+ *    from the registry (e.g. getDocFetcherFactory()) without knowing which
+ *    preset provided them.
+ *
+ * To add a new preset: create a PresetDefinition, then call
+ * registry.registerPreset() here before the built-in mode registrations.
+ */
+function createRegistry(rootDir) {
+    const registry = new InMemoryPluginRegistry();
+    // Register the sanity-literacy preset — the Sanity-specific evaluation bundle.
+    registry.registerPreset(createSanityLiteracyPreset({ rootDir }));
+    // Register other built-in modes (not part of any preset yet)
+    for (const mode of BUILT_IN_MODES) {
+        registry.registerMode(mode);
+    }
+    return registry;
+}
 function createReportStore(config) {
     return new ReportStore({
         dataset: process.env.AILF_REPORT_DATASET ??

package/dist/orchestration/build-app-context.js CHANGED Viewed

@@ -20,6 +20,7 @@ export function mapToResolvedConfig(opts, rootDir) {
     return {
         rootDir,
         mode: opts.mode,
+        variant: opts.variant,
         noAutoScope: opts.noAutoScope ?? false,
         debug: opts.debug,
         areas: opts.areaOption

package/dist/orchestration/build-step-sequence.js CHANGED Viewed

@@ -5,7 +5,7 @@
  * PipelineStep objects determined by config flags like skipFetch,
  * skipEval, compareEnabled, etc.
  */
-import { FULL_MODE_SUBMODES } from "../_vendor/ailf-shared/index.js";
+import { LiteracyVariant } from "../pipeline/normalize-mode.js";
 import { CallbackStep } from "./steps/callback-step.js";
 import { CalculateScoresStep } from "./steps/calculate-scores-step.js";
 import { CompareStep } from "./steps/compare-step.js";
@@ -40,11 +40,29 @@ export function buildStepSequence(ctx, pipelineStart = Date.now()) {
     // Step 2: Generate Promptfoo configs
     steps.push(new GenerateConfigsStep());
     // Step 3: Run evaluation (steps handle --skip-eval internally)
-    const modes = config.mode === "full"
-        ? [...FULL_MODE_SUBMODES]
-        : [config.mode];
-    for (const mode of modes) {
-        steps.push(new RunEvalStep(mode));
+    //
+    // For literacy mode, the variant determines how many eval steps run:
+    //   "full" → baseline + agentic (two steps)
+    //   "baseline" / "agentic" / "observed" → one step
+    //   undefined → defaults to baseline
+    //
+    // For all other modes, one eval step per mode.
+    if (config.mode === "literacy") {
+        const variant = config.variant ?? LiteracyVariant.STANDARD;
+        if (variant === LiteracyVariant.FULL) {
+            for (const submode of [
+                LiteracyVariant.STANDARD,
+                LiteracyVariant.AGENTIC,
+            ]) {
+                steps.push(new RunEvalStep(submode));
+            }
+        }
+        else {
+            steps.push(new RunEvalStep(variant));
+        }
+    }
+    else {
+        steps.push(new RunEvalStep(config.mode));
     }
     // Step 3c: Grader consistency (optional, conditional)
     if (config.graderReplications) {