npm - @sanity/ailf - Versions diffs - 0.4.1 → 1.0.0 - Mend

@sanity/ailf 0.4.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (290) hide show

package/config/features.ts +23 -0
package/config/models.ts +83 -0
package/config/prompts.ts +16 -0
package/config/rubrics.ts +225 -0
package/config/schedules.ts +47 -0
package/config/sinks.ts +37 -0
package/config/sources.ts +21 -0
package/config/thresholds.ts +61 -0
package/dist/_vendor/ailf-core/config-helpers.d.ts +174 -0
package/dist/_vendor/ailf-core/config-helpers.js +150 -0
package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
package/dist/_vendor/ailf-core/env-helper.js +45 -0
package/dist/_vendor/ailf-core/examples/index.d.ts +10 -10
package/dist/_vendor/ailf-core/examples/index.js +10 -10
package/dist/_vendor/ailf-core/index.d.ts +3 -0
package/dist/_vendor/ailf-core/index.js +5 -0
package/dist/_vendor/ailf-core/ports/context.d.ts +15 -2
package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +7 -2
package/dist/_vendor/ailf-core/schemas/eval-config.js +7 -2
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +8 -3
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +6 -1
package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +32 -31
package/dist/_vendor/ailf-core/schemas/pipeline.js +52 -12
package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
package/dist/_vendor/ailf-core/services/index.js +2 -1
package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
package/dist/_vendor/ailf-core/services/scoring.js +25 -15
package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
package/dist/_vendor/ailf-core/types/generalized-task.d.ts +319 -0
package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
package/dist/_vendor/ailf-core/types/index.d.ts +45 -81
package/dist/_vendor/ailf-core/types/index.js +8 -1
package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +202 -0
package/dist/_vendor/ailf-core/types/plugin-registry.js +132 -0
package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
package/dist/_vendor/ailf-core/types/trace.js +18 -0
package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
package/dist/_vendor/ailf-shared/index.d.ts +0 -1
package/dist/_vendor/ailf-shared/index.js +0 -1
package/dist/adapters/api-client/build-request.js +14 -13
package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
package/dist/adapters/config-sources/file-config-adapter.js +38 -12
package/dist/adapters/config-sources/index.d.ts +2 -0
package/dist/adapters/config-sources/index.js +1 -0
package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
package/dist/adapters/config-sources/ts-config-loader.js +133 -0
package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
package/dist/adapters/task-sources/composite-task-source.js +1 -1
package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
package/dist/adapters/task-sources/content-lake-task-source.js +22 -23
package/dist/adapters/task-sources/index.d.ts +1 -0
package/dist/adapters/task-sources/index.js +1 -0
package/dist/adapters/task-sources/repo-task-source.d.ts +4 -4
package/dist/adapters/task-sources/repo-task-source.js +69 -16
package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
package/dist/adapters/task-sources/task-file-loader.js +83 -0
package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
package/dist/adapters/task-sources/yaml-task-source.js +19 -16
package/dist/cli.js +0 -2
package/dist/commands/baseline.js +4 -1
package/dist/commands/calculate-scores.js +1 -1
package/dist/commands/coverage-audit.js +7 -1
package/dist/commands/explain-handler.js +25 -23
package/dist/commands/fetch-docs.js +3 -2
package/dist/commands/generate-configs.js +1 -1
package/dist/commands/interactive.js +11 -7
package/dist/commands/pipeline-action.d.ts +2 -0
package/dist/commands/pipeline-action.js +16 -6
package/dist/commands/pipeline.d.ts +1 -0
package/dist/commands/pipeline.js +4 -2
package/dist/commands/pr-comment.js +1 -1
package/dist/commands/publish.js +2 -2
package/dist/commands/readiness-report.js +13 -6
package/dist/composition-root.d.ts +1 -1
package/dist/composition-root.js +67 -4
package/dist/orchestration/build-app-context.js +1 -0
package/dist/orchestration/build-step-sequence.js +24 -6
package/dist/orchestration/steps/calculate-scores-step.js +24 -11
package/dist/orchestration/steps/fetch-docs-step.js +6 -4
package/dist/orchestration/steps/gap-analysis-step.js +8 -7
package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
package/dist/orchestration/steps/generate-configs-step.js +245 -51
package/dist/orchestration/steps/grader-consistency-step.js +7 -4
package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
package/dist/orchestration/steps/readiness-step.js +5 -6
package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
package/dist/orchestration/steps/run-eval-step.js +8 -7
package/dist/pipeline/cache.d.ts +1 -1
package/dist/pipeline/cache.js +36 -8
package/dist/pipeline/calculate-scores.d.ts +5 -7
package/dist/pipeline/calculate-scores.js +74 -153
package/dist/pipeline/checks.js +2 -2
package/dist/pipeline/compare.js +8 -8
package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +355 -0
package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +471 -0
package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
package/dist/pipeline/compiler/assertion-mapper.js +175 -0
package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
package/dist/pipeline/compiler/config-loader.d.ts +56 -0
package/dist/pipeline/compiler/config-loader.js +111 -0
package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
package/dist/pipeline/compiler/fixture-resolver.js +113 -0
package/dist/pipeline/compiler/hash.d.ts +11 -0
package/dist/pipeline/compiler/hash.js +18 -0
package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
package/dist/pipeline/compiler/ignore-fields.js +113 -0
package/dist/pipeline/compiler/index.d.ts +29 -0
package/dist/pipeline/compiler/index.js +45 -0
package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
package/dist/pipeline/compiler/literacy-bridge.js +172 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
package/dist/pipeline/compiler/mode-handlers/index.d.ts +16 -0
package/dist/pipeline/compiler/mode-handlers/index.js +21 -0
package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +277 -0
package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +67 -0
package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +309 -0
package/dist/pipeline/compiler/presets/index.d.ts +9 -0
package/dist/pipeline/compiler/presets/index.js +8 -0
package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +45 -0
package/dist/pipeline/compiler/presets/sanity-literacy.js +354 -0
package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
package/dist/pipeline/compiler/provider-assembler.js +137 -0
package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
package/dist/pipeline/compiler/sandbox/index.js +11 -0
package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
package/dist/pipeline/compiler/scoring-bridge.js +114 -0
package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
package/dist/pipeline/compiler/task-graph-builder.js +291 -0
package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
package/dist/pipeline/compiler/telemetry/index.js +19 -0
package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
package/dist/pipeline/compiler/variable-resolver.js +115 -0
package/dist/pipeline/coverage-audit.d.ts +15 -5
package/dist/pipeline/coverage-audit.js +41 -22
package/dist/pipeline/eval-constants.d.ts +16 -6
package/dist/pipeline/eval-constants.js +25 -4
package/dist/pipeline/eval-fingerprint.d.ts +2 -2
package/dist/pipeline/eval-fingerprint.js +8 -9
package/dist/pipeline/expand-tasks.d.ts +23 -14
package/dist/pipeline/expand-tasks.js +37 -31
package/dist/pipeline/gap-analysis.d.ts +1 -1
package/dist/pipeline/gap-analysis.js +2 -2
package/dist/pipeline/generate-configs.d.ts +22 -4
package/dist/pipeline/generate-configs.js +53 -24
package/dist/pipeline/grader-api.d.ts +3 -3
package/dist/pipeline/grader-api.js +5 -12
package/dist/pipeline/grader-compare-runner.js +20 -27
package/dist/pipeline/grader-comparison.d.ts +4 -8
package/dist/pipeline/grader-comparison.js +11 -17
package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
package/dist/pipeline/grader-consistency-runner.js +18 -21
package/dist/pipeline/grader-consistency.d.ts +6 -10
package/dist/pipeline/grader-consistency.js +13 -32
package/dist/pipeline/grader-sensitivity-runner.js +7 -5
package/dist/pipeline/grader-sensitivity.d.ts +2 -6
package/dist/pipeline/grader-sensitivity.js +10 -10
package/dist/pipeline/grader-validate-runner.js +7 -5
package/dist/pipeline/grader-validation.d.ts +2 -6
package/dist/pipeline/grader-validation.js +14 -22
package/dist/pipeline/map-request-to-config.js +6 -1
package/dist/pipeline/mirror-repo-tasks.d.ts +6 -6
package/dist/pipeline/mirror-repo-tasks.js +16 -15
package/dist/pipeline/normalize-mode.d.ts +49 -0
package/dist/pipeline/normalize-mode.js +64 -0
package/dist/pipeline/plan.d.ts +5 -2
package/dist/pipeline/plan.js +134 -78
package/dist/pipeline/pr-comment.js +2 -0
package/dist/pipeline/profile-resolution.d.ts +47 -0
package/dist/pipeline/profile-resolution.js +91 -0
package/dist/pipeline/provenance.d.ts +2 -2
package/dist/pipeline/provenance.js +12 -17
package/dist/pipeline/release-report.js +4 -4
package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
package/dist/pipeline/repo-threshold-evaluator.js +1 -1
package/dist/pipeline/rubric-loader.d.ts +20 -0
package/dist/pipeline/rubric-loader.js +37 -0
package/dist/pipeline/validate.d.ts +4 -4
package/dist/pipeline/validate.js +64 -53
package/dist/schedules/loader.js +18 -8
package/dist/scripts/migrate-task-mode.d.ts +24 -0
package/dist/scripts/migrate-task-mode.js +85 -0
package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
package/dist/scripts/validate-task-sources.d.ts +1 -1
package/dist/scripts/validate-task-sources.js +15 -15
package/dist/sinks/loader.js +5 -7
package/dist/sources.d.ts +7 -7
package/dist/sources.js +22 -24
package/dist/webhook/dispatch.js +2 -1
package/package.json +6 -3
package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
package/tasks/literacy/frameworks.task.ts +128 -0
package/tasks/literacy/functions.task.ts +69 -0
package/tasks/literacy/groq.task.ts +258 -0
package/tasks/literacy/nextjs-live.task.ts +75 -0
package/tasks/literacy/studio-setup.task.ts +131 -0
package/tasks/literacy/visual-editing.task.ts +146 -0
package/config/features.yaml +0 -116
package/config/models.yaml +0 -116
package/config/prompts.yaml +0 -75
package/config/rubrics.yaml +0 -62
package/config/schedules.yaml +0 -43
package/config/sinks.yaml +0 -54
package/config/sources.yaml +0 -51
package/config/thresholds.yaml +0 -49
package/dist/agent-observer/test-imports.d.ts +0 -7
package/dist/agent-observer/test-imports.js +0 -185

package/dist/_vendor/ailf-core/schemas/eval-config.js CHANGED Viewed

@@ -10,6 +10,7 @@
  * (studio-eval-config) so Content Lake documents validate identically.
  */
 import { z } from "zod";
+import { RAW_EVAL_MODES } from "../../ailf-shared/index.js";
 export const EvalConfigSchema = z
     .object({
     /** Allowed origins for agentic mode */
@@ -46,8 +47,12 @@ export const EvalConfigSchema = z
     graderReplications: z.number().int().positive().optional(),
     /** Custom headers for doc fetching */
     headers: z.record(z.string(), z.string()).optional(),
-    /** Evaluation mode */
-    mode: z.enum(["baseline", "agentic", "observed", "full"]).optional(),
+    /**
+     * Evaluation mode — accepts both canonical and legacy names.
+     * Legacy names ("baseline", "agentic", "observed", "full") must pass
+     * through normalizeMode() before entering typed pipeline code.
+     */
+    mode: z.enum(RAW_EVAL_MODES).optional(),
     /** Disable release-aware auto-scoping */
     noAutoScope: z.boolean().optional(),
     /** Disable local cache */

package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts CHANGED Viewed

@@ -49,10 +49,15 @@ export declare const PipelineRequestSchema: z.ZodObject<{
     inlineTasks: z.ZodOptional<z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
     jobId: z.ZodOptional<z.ZodString>;
     mode: z.ZodOptional<z.ZodEnum<{
-        agentic: "agentic";
+        custom: "custom";
+        literacy: "literacy";
+        "mcp-server": "mcp-server";
+        "agent-harness": "agent-harness";
+        "knowledge-probe": "knowledge-probe";
         baseline: "baseline";
-        full: "full";
+        agentic: "agentic";
         observed: "observed";
+        full: "full";
     }>>;
     noAutoScope: z.ZodOptional<z.ZodBoolean>;
     noCache: z.ZodOptional<z.ZodBoolean>;
@@ -70,9 +75,9 @@ export declare const PipelineRequestSchema: z.ZodObject<{
     source: z.ZodOptional<z.ZodString>;
     sourceReportId: z.ZodOptional<z.ZodString>;
     taskMode: z.ZodOptional<z.ZodEnum<{
+        inline: "inline";
         "content-lake": "content-lake";
         yaml: "yaml";
-        inline: "inline";
     }>>;
     tasks: z.ZodOptional<z.ZodArray<z.ZodString>>;
     urls: z.ZodOptional<z.ZodArray<z.ZodString>>;

package/dist/_vendor/ailf-core/schemas/pipeline-request.js CHANGED Viewed

@@ -13,6 +13,7 @@
  * @see packages/eval/src/pipeline/map-request-to-config.ts — maps to ResolvedConfig
  */
 import { z } from "zod";
+import { RAW_EVAL_MODES } from "../../ailf-shared/index.js";
 // ---------------------------------------------------------------------------
 // Debug options — boolean shorthand or structured object
 // ---------------------------------------------------------------------------
@@ -69,7 +70,11 @@ export const PipelineRequestSchema = z.object({
     headers: z.record(z.string(), z.string()).optional(),
     inlineTasks: z.array(z.record(z.string(), z.unknown())).optional(),
     jobId: z.string().optional(),
-    mode: z.enum(["baseline", "agentic", "observed", "full"]).optional(),
+    /**
+     * Evaluation mode — accepts both canonical and legacy names.
+     * Legacy names must pass through normalizeMode() before entering typed pipeline code.
+     */
+    mode: z.enum(RAW_EVAL_MODES).optional(),
     noAutoScope: z.boolean().optional(),
     noCache: z.boolean().optional(),
     noRemoteCache: z.boolean().optional(),

package/dist/_vendor/ailf-core/schemas/pipeline.d.ts CHANGED Viewed

@@ -25,21 +25,37 @@ export declare const RubricTemplateSchema: z.ZodObject<{
 }, z.core.$strip>;
 /** Inferred TypeScript type for a rubric template. */
 export type RubricTemplate = z.infer<typeof RubricTemplateSchema>;
+/**
+ * A named weight profile — maps dimension names to weights (must sum to 1.0).
+ * Each profile is a self-contained scoring formula used for a specific
+ * (mode, variant) pair.
+ */
+declare const WeightProfileSchema: z.ZodRecord<z.ZodString, z.ZodNumber>;
+/** Inferred type for a single weight profile. */
+export type WeightProfile = z.infer<typeof WeightProfileSchema>;
 /**
  * Schema for the full config/rubrics.yaml config file.
  *
- * Each dimension is scored on a uniform 0–100 scale. The `weights` section
- * defines how dimensions are combined into a composite score (must sum to 1.0).
+ * Each dimension is scored on a uniform 0–100 scale. Named scoring profiles
+ * define how dimensions are combined into composite scores. Mode-profile
+ * bindings declare which profile to use for each (mode, variant) pair.
+ *
+ * Supports both the new `profiles` format and the legacy flat `weights`
+ * format for backward compatibility.
+ *
+ * @see docs/design-docs/named-scoring-profiles.md
  */
 export declare const RubricConfigSchema: z.ZodObject<{
     footer: z.ZodString;
+    "mode-profiles": z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodString, z.ZodRecord<z.ZodString, z.ZodString>]>>>>;
+    profiles: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodNumber>>>;
     templates: z.ZodRecord<z.ZodString, z.ZodObject<{
         criteria_label: z.ZodOptional<z.ZodNullable<z.ZodString>>;
         dimension: z.ZodOptional<z.ZodString>;
         header: z.ZodString;
         scale: z.ZodArray<z.ZodString>;
     }, z.core.$strip>>;
-    weights: z.ZodRecord<z.ZodString, z.ZodNumber>;
+    weights: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
 }, z.core.$strip>;
 /** Inferred TypeScript type for the rubrics config. */
 export type RubricConfig = z.infer<typeof RubricConfigSchema>;
@@ -51,17 +67,17 @@ export declare const FeatureSchema: z.ZodObject<{
     id: z.ZodString;
     name: z.ZodString;
     priority: z.ZodEnum<{
+        critical: "critical";
         high: "high";
-        low: "low";
         medium: "medium";
-        critical: "critical";
+        low: "low";
     }>;
     sections: z.ZodArray<z.ZodString>;
     status: z.ZodEnum<{
         covered: "covered";
-        "out-of-scope": "out-of-scope";
-        planned: "planned";
         uncovered: "uncovered";
+        planned: "planned";
+        "out-of-scope": "out-of-scope";
     }>;
     taskCount: z.ZodOptional<z.ZodNumber>;
 }, z.core.$strip>;
@@ -76,17 +92,17 @@ export declare const FeatureRegistrySchema: z.ZodObject<{
         id: z.ZodString;
         name: z.ZodString;
         priority: z.ZodEnum<{
+            critical: "critical";
             high: "high";
-            low: "low";
             medium: "medium";
-            critical: "critical";
+            low: "low";
         }>;
         sections: z.ZodArray<z.ZodString>;
         status: z.ZodEnum<{
             covered: "covered";
-            "out-of-scope": "out-of-scope";
-            planned: "planned";
             uncovered: "uncovered";
+            planned: "planned";
+            "out-of-scope": "out-of-scope";
         }>;
         taskCount: z.ZodOptional<z.ZodNumber>;
     }, z.core.$strip>>;
@@ -424,14 +440,11 @@ export declare const TaskFileSchema: z.ZodArray<z.ZodUnion<readonly [z.ZodObject
 export type TaskFile = z.infer<typeof TaskFileSchema>;
 /**
  * Schema for per-dimension threshold values.
+ * Uses a dynamic record to support all evaluation modes, not just literacy.
  * Keys use kebab-case to match YAML convention; the threshold engine
  * normalizes to camelCase for comparison against FeatureScore fields.
  */
-export declare const ThresholdDimensionsSchema: z.ZodObject<{
-    "code-correctness": z.ZodOptional<z.ZodNumber>;
-    "doc-coverage": z.ZodOptional<z.ZodNumber>;
-    "task-completion": z.ZodOptional<z.ZodNumber>;
-}, z.core.$strip>;
+export declare const ThresholdDimensionsSchema: z.ZodRecord<z.ZodString, z.ZodNumber>;
 /** Inferred TypeScript type for threshold dimension overrides. */
 export type ThresholdDimensions = z.infer<typeof ThresholdDimensionsSchema>;
 /**
@@ -441,11 +454,7 @@ export type ThresholdDimensions = z.infer<typeof ThresholdDimensionsSchema>;
 export declare const ThresholdDefaultsSchema: z.ZodObject<{
     ceiling: z.ZodOptional<z.ZodNumber>;
     composite: z.ZodNumber;
-    dimensions: z.ZodOptional<z.ZodObject<{
-        "code-correctness": z.ZodOptional<z.ZodNumber>;
-        "doc-coverage": z.ZodOptional<z.ZodNumber>;
-        "task-completion": z.ZodOptional<z.ZodNumber>;
-    }, z.core.$strip>>;
+    dimensions: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
     "doc-lift": z.ZodOptional<z.ZodNumber>;
 }, z.core.$strip>;
 /** Inferred TypeScript type for threshold defaults. */
@@ -485,21 +494,13 @@ export declare const ThresholdConfigSchema: z.ZodObject<{
     areas: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodObject<{
         ceiling: z.ZodOptional<z.ZodOptional<z.ZodNumber>>;
         composite: z.ZodOptional<z.ZodNumber>;
-        dimensions: z.ZodOptional<z.ZodOptional<z.ZodObject<{
-            "code-correctness": z.ZodOptional<z.ZodNumber>;
-            "doc-coverage": z.ZodOptional<z.ZodNumber>;
-            "task-completion": z.ZodOptional<z.ZodNumber>;
-        }, z.core.$strip>>>;
+        dimensions: z.ZodOptional<z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>>;
         "doc-lift": z.ZodOptional<z.ZodOptional<z.ZodNumber>>;
     }, z.core.$strip>>>;
     defaults: z.ZodObject<{
         ceiling: z.ZodOptional<z.ZodNumber>;
         composite: z.ZodNumber;
-        dimensions: z.ZodOptional<z.ZodObject<{
-            "code-correctness": z.ZodOptional<z.ZodNumber>;
-            "doc-coverage": z.ZodOptional<z.ZodNumber>;
-            "task-completion": z.ZodOptional<z.ZodNumber>;
-        }, z.core.$strip>>;
+        dimensions: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
         "doc-lift": z.ZodOptional<z.ZodNumber>;
     }, z.core.$strip>;
     regression: z.ZodOptional<z.ZodObject<{

package/dist/_vendor/ailf-core/schemas/pipeline.js CHANGED Viewed

@@ -31,23 +31,66 @@ export const RubricTemplateSchema = z.object({
         .array(z.string().min(1))
         .min(1, "scale must have at least one entry"),
 });
+/**
+ * A named weight profile — maps dimension names to weights (must sum to 1.0).
+ * Each profile is a self-contained scoring formula used for a specific
+ * (mode, variant) pair.
+ */
+const WeightProfileSchema = z
+    .record(z.string(), z.number().min(0).max(1))
+    .refine((w) => {
+    const sum = Object.values(w).reduce((s, v) => s + v, 0);
+    return Math.abs(sum - 1.0) < 0.001;
+}, { message: "profile weights must sum to 1.0" });
+/**
+ * Mode-to-profile bindings — maps (mode, perspective) pairs to profile names.
+ *
+ * Flat form (most modes):
+ *   { "mcp-server": { gold: "mcp-behavior" } }
+ *
+ * Nested form (literacy mode with variant sub-keys):
+ *   { literacy: { baseline: { gold: "default", baseline: "output-only" }, agentic: { gold: "default" } } }
+ *
+ * The nested form adds a variant level between mode and perspective,
+ * allowing a single canonical mode to host multiple scoring variants.
+ */
+const ModeProfileEntrySchema = z.union([
+    z.string(),
+    z.record(z.string(), z.string()),
+]);
+const ModeProfilesSchema = z.record(z.string(), z.record(z.string(), ModeProfileEntrySchema));
 /**
  * Schema for the full config/rubrics.yaml config file.
  *
- * Each dimension is scored on a uniform 0–100 scale. The `weights` section
- * defines how dimensions are combined into a composite score (must sum to 1.0).
+ * Each dimension is scored on a uniform 0–100 scale. Named scoring profiles
+ * define how dimensions are combined into composite scores. Mode-profile
+ * bindings declare which profile to use for each (mode, variant) pair.
+ *
+ * Supports both the new `profiles` format and the legacy flat `weights`
+ * format for backward compatibility.
+ *
+ * @see docs/design-docs/named-scoring-profiles.md
  */
-export const RubricConfigSchema = z.object({
+export const RubricConfigSchema = z
+    .object({
     footer: z.string().min(1, "footer must be a non-empty string"),
+    "mode-profiles": ModeProfilesSchema.optional(),
+    profiles: z
+        .record(z.string(), WeightProfileSchema)
+        .refine((p) => "default" in p, {
+        message: "profiles must include a 'default' profile",
+    })
+        .optional(),
     templates: z
         .record(z.string(), RubricTemplateSchema)
         .refine((t) => Object.keys(t).length > 0, {
         message: "templates must have at least one entry",
     }),
-    weights: z.record(z.string(), z.number().min(0).max(1)).refine((w) => {
-        const sum = Object.values(w).reduce((s, v) => s + v, 0);
-        return Math.abs(sum - 1.0) < 0.001;
-    }, { message: "weights must sum to 1.0" }),
+    // Legacy: flat weight map. Treated as a single profile named "default".
+    weights: WeightProfileSchema.optional(),
+})
+    .refine((c) => c.profiles !== undefined || c.weights !== undefined, {
+    message: "rubrics.yaml must have either 'profiles' or 'weights'",
 });
 // ---------------------------------------------------------------------------
 // Feature registry schema — validates config/features.yaml (Phase 3c)
@@ -246,14 +289,11 @@ export const TaskFileSchema = z
 // ---------------------------------------------------------------------------
 /**
  * Schema for per-dimension threshold values.
+ * Uses a dynamic record to support all evaluation modes, not just literacy.
  * Keys use kebab-case to match YAML convention; the threshold engine
  * normalizes to camelCase for comparison against FeatureScore fields.
  */
-export const ThresholdDimensionsSchema = z.object({
-    "code-correctness": z.number().min(0).max(100).optional(),
-    "doc-coverage": z.number().min(0).max(100).optional(),
-    "task-completion": z.number().min(0).max(100).optional(),
-});
+export const ThresholdDimensionsSchema = z.record(z.string(), z.number().min(0).max(100));
 /**
  * Schema for threshold defaults (and per-area overrides).
  * All fields are optional in per-area overrides; defaults must have composite.

package/dist/_vendor/ailf-core/schemas/schedules.d.ts CHANGED Viewed

@@ -18,10 +18,15 @@ export declare const ScheduleEntrySchema: z.ZodObject<{
     cron: z.ZodString;
     enabled: z.ZodDefault<z.ZodBoolean>;
     mode: z.ZodDefault<z.ZodEnum<{
-        agentic: "agentic";
+        custom: "custom";
+        literacy: "literacy";
+        "mcp-server": "mcp-server";
+        "agent-harness": "agent-harness";
+        "knowledge-probe": "knowledge-probe";
         baseline: "baseline";
-        full: "full";
+        agentic: "agentic";
         observed: "observed";
+        full: "full";
     }>>;
     name: z.ZodString;
     publish: z.ZodDefault<z.ZodBoolean>;
@@ -53,10 +58,15 @@ export declare const SchedulesFileSchema: z.ZodObject<{
         cron: z.ZodString;
         enabled: z.ZodDefault<z.ZodBoolean>;
         mode: z.ZodDefault<z.ZodEnum<{
-            agentic: "agentic";
+            custom: "custom";
+            literacy: "literacy";
+            "mcp-server": "mcp-server";
+            "agent-harness": "agent-harness";
+            "knowledge-probe": "knowledge-probe";
             baseline: "baseline";
-            full: "full";
+            agentic: "agentic";
             observed: "observed";
+            full: "full";
         }>>;
         name: z.ZodString;
         publish: z.ZodDefault<z.ZodBoolean>;

package/dist/_vendor/ailf-core/schemas/schedules.js CHANGED Viewed

@@ -11,6 +11,7 @@
  * @see docs/design-docs/report-store/implementation.md — Phase 5
  */
 import { z } from "zod";
+import { RAW_EVAL_MODES } from "../../ailf-shared/index.js";
 // ---------------------------------------------------------------------------
 // Cron expression validation
 // ---------------------------------------------------------------------------
@@ -34,8 +35,11 @@ export const ScheduleEntrySchema = z.object({
     cron: CronSchema,
     /** Whether this schedule is active */
     enabled: z.boolean().default(true),
-    /** Evaluation mode */
-    mode: z.enum(["agentic", "baseline", "full", "observed"]).default("baseline"),
+    /**
+     * Evaluation mode — accepts both canonical and legacy names.
+     * Legacy names must pass through normalizeMode() before entering typed pipeline code.
+     */
+    mode: z.enum(RAW_EVAL_MODES).default("baseline"),
     /** Human-readable schedule name (used as report tag) */
     name: z
         .string()

package/dist/_vendor/ailf-core/schemas/sinks.d.ts CHANGED Viewed

@@ -17,10 +17,10 @@
 import { z } from "zod";
 /** All supported sink types as a Zod union. */
 export declare const SinkTypeSchema: z.ZodEnum<{
-    webhook: "webhook";
     bigquery: "bigquery";
     "github-comment": "github-comment";
     slack: "slack";
+    webhook: "webhook";
 }>;
 /** Supported sink type string literal union. */
 export type SinkType = z.infer<typeof SinkTypeSchema>;

package/dist/_vendor/ailf-core/services/comparison-formatters.js CHANGED Viewed

@@ -25,12 +25,21 @@ export function formatComparisonMarkdown(report) {
     lines.push("");
     lines.push(`**Overall: ${Math.round(report.baseline.overall.avgScore)} → ${Math.round(report.experiment.overall.avgScore)}** (${overallIcon} ${deltaStr(overall)})`);
     lines.push("");
-    // Per-area table
-    lines.push("| Feature | Baseline | Current | Delta | Task | Code | Docs |");
-    lines.push("|---------|----------|---------|-------|------|------|------|");
+    // Derive dimension columns from the first area's keys (all areas share the
+    // same scoring profile, so the key set is uniform).
+    const dimKeys = report.areas.length > 0
+        ? Object.keys(report.areas[0].dimensions)
+        : Object.keys(report.deltas.perDimension);
+    // Per-area table — columns are dynamic
+    const dimHeaders = dimKeys.map(kebabToTitleCase);
+    const headerRow = ["Feature", "Baseline", "Current", "Delta", ...dimHeaders];
+    const separatorRow = headerRow.map(() => "------");
+    lines.push(`| ${headerRow.join(" | ")} |`);
+    lines.push(`|${separatorRow.join("|")}|`);
     for (const a of report.areas) {
         const icon = changeIcon(a.change);
-        lines.push(`| ${a.area} | ${a.baseline} | ${a.experiment} | ${icon} ${deltaStr(a.delta)} | ${deltaStr(a.dimensions.taskCompletion.delta)} | ${deltaStr(a.dimensions.codeCorrectness.delta)} | ${deltaStr(a.dimensions.docCoverage.delta)} |`);
+        const dimCells = dimKeys.map((k) => deltaStr(a.dimensions[k]?.delta ?? 0));
+        lines.push(`| ${a.area} | ${a.baseline} | ${a.experiment} | ${icon} ${deltaStr(a.delta)} | ${dimCells.join(" | ")} |`);
     }
     lines.push("");
     // Summary
@@ -55,9 +64,9 @@ export function formatComparisonMarkdown(report) {
     const dim = report.deltas.perDimension;
     lines.push("| Dimension | Delta |");
     lines.push("|-----------|-------|");
-    lines.push(`| Task Completion | ${deltaStr(dim.taskCompletion)} |`);
-    lines.push(`| Code Correctness | ${deltaStr(dim.codeCorrectness)} |`);
-    lines.push(`| Doc Coverage | ${deltaStr(dim.docCoverage)} |`);
+    for (const k of Object.keys(dim)) {
+        lines.push(`| ${kebabToTitleCase(k)} | ${deltaStr(dim[k])} |`);
+    }
     lines.push(`| Doc Lift | ${deltaStr(report.deltas.docLift)} |`);
     if (report.deltas.cost !== undefined) {
         const costStr = report.deltas.cost > 0
@@ -91,29 +100,51 @@ export function formatComparisonTable(report) {
             : "unchanged");
     lines.push(`  Overall: ${Math.round(report.baseline.overall.avgScore)} → ${Math.round(report.experiment.overall.avgScore)} (${overallIcon} ${deltaStr(overall)})`);
     lines.push("");
-    // Per-dimension averages
+    // Per-dimension averages — derived dynamically from the report
     const dim = report.deltas.perDimension;
+    const dimKeys = report.areas.length > 0
+        ? Object.keys(report.areas[0].dimensions)
+        : Object.keys(dim);
     lines.push("  Dimension averages:");
-    lines.push(`    Task Completion:  ${deltaStr(dim.taskCompletion)}`);
-    lines.push(`    Code Correctness: ${deltaStr(dim.codeCorrectness)}`);
-    lines.push(`    Doc Coverage:     ${deltaStr(dim.docCoverage)}`);
-    lines.push(`    Doc Lift:         ${deltaStr(report.deltas.docLift)}`);
+    // Pad labels to the longest dimension label for alignment
+    const dimLabels = dimKeys.map(kebabToTitleCase);
+    // +1 for the colon appended to each label
+    const maxLabelLen = Math.max(...dimLabels.map((l) => l.length + 1), "Doc Lift:".length);
+    for (let i = 0; i < dimKeys.length; i++) {
+        lines.push(`    ${(dimLabels[i] + ":").padEnd(maxLabelLen)} ${deltaStr(dim[dimKeys[i]] ?? 0)}`);
+    }
+    lines.push(`    ${"Doc Lift:".padEnd(maxLabelLen)} ${deltaStr(report.deltas.docLift)}`);
     if (report.deltas.cost !== undefined) {
-        lines.push(`    Cost:             ${report.deltas.cost > 0 ? "+" : ""}$${report.deltas.cost.toFixed(4)}`);
+        lines.push(`    ${"Cost:".padEnd(maxLabelLen)} ${report.deltas.cost > 0 ? "+" : ""}$${report.deltas.cost.toFixed(4)}`);
     }
     lines.push("");
-    // Per-area table
+    // Per-area table — columns are dynamic
     lines.push("-".repeat(80));
     lines.push("PER-AREA BREAKDOWN");
     lines.push("-".repeat(80));
     lines.push("");
-    const h = "| Feature Area        | Baseline | Experiment | Delta | Task | Code | Docs |";
-    const sep = "|---------------------|----------|------------|-------|------|------|------|";
-    lines.push(h);
-    lines.push(sep);
+    const dimHeaders = dimKeys.map(kebabToTitleCase);
+    const colWidths = dimHeaders.map((h) => Math.max(h.length, 4));
+    const hCols = [
+        "Feature Area".padEnd(19),
+        "Baseline".padStart(8),
+        "Experiment".padStart(10),
+        "Delta".padStart(5),
+        ...dimHeaders.map((h, i) => h.padStart(colWidths[i])),
+    ];
+    const sepCols = [
+        "-".repeat(21),
+        "-".repeat(10),
+        "-".repeat(12),
+        "-".repeat(7),
+        ...colWidths.map((w) => "-".repeat(w + 2)),
+    ];
+    lines.push(`| ${hCols.join(" | ")} |`);
+    lines.push(`|${sepCols.join("|")}|`);
     for (const a of report.areas) {
         const icon = changeIcon(a.change);
-        lines.push(`| ${icon} ${a.area.padEnd(17)} | ${String(a.baseline).padStart(8)} | ${String(a.experiment).padStart(10)} | ${deltaStr(a.delta).padStart(5)} | ${deltaStr(a.dimensions.taskCompletion.delta).padStart(4)} | ${deltaStr(a.dimensions.codeCorrectness.delta).padStart(4)} | ${deltaStr(a.dimensions.docCoverage.delta).padStart(4)} |`);
+        const dimCells = dimKeys.map((k, i) => deltaStr(a.dimensions[k]?.delta ?? 0).padStart(colWidths[i]));
+        lines.push(`| ${icon} ${a.area.padEnd(17)} | ${String(a.baseline).padStart(8)} | ${String(a.experiment).padStart(10)} | ${deltaStr(a.delta).padStart(5)} | ${dimCells.join(" | ")} |`);
     }
     lines.push("");
     // Classification summary
@@ -187,3 +218,10 @@ function deltaStr(d) {
         return `${Math.round(d)}`;
     return "0";
 }
+/** Convert kebab-case dimension name to title case (e.g. 'task-completion' → 'Task Completion') */
+function kebabToTitleCase(name) {
+    return name
+        .split("-")
+        .map((w) => w.charAt(0).toUpperCase() + w.slice(1))
+        .join(" ");
+}

package/dist/_vendor/ailf-core/services/index.d.ts CHANGED Viewed

@@ -7,6 +7,7 @@
  * Extracted from packages/eval/src/lib/ during the Ports & Adapters
  * migration (Phase 4e).
  */
-export { classifyRubric, detectFeatureArea, extractUrlMetadata, mergeScores, parseRubricScore, } from "./scoring.js";
+export { classifyRubric, detectFeatureArea, extractDimensions, extractUrlMetadata, mergeScores, parseRubricScore, } from "./scoring.js";
 export { formatComparisonMarkdown, formatComparisonTable, } from "./comparison-formatters.js";
+export { aggregateAreas, aggregateDimensions, computeEnsembleScore, computeTaskScore, normalizeScore, type AggregationStrategy, type AreaScore, type AssertionScore, type DimensionScore, type EnsembleGradingConfig, type GraderTransitionConfig, type TaskScore, type TaskScoreOptions, } from "./scoring-engine.js";
 export { extractModelName, extractProvider, mergeConfig, modelMatchesMode, } from "./config-helpers.js";

package/dist/_vendor/ailf-core/services/index.js CHANGED Viewed

@@ -7,6 +7,7 @@
  * Extracted from packages/eval/src/lib/ during the Ports & Adapters
  * migration (Phase 4e).
  */
-export { classifyRubric, detectFeatureArea, extractUrlMetadata, mergeScores, parseRubricScore, } from "./scoring.js";
+export { classifyRubric, detectFeatureArea, extractDimensions, extractUrlMetadata, mergeScores, parseRubricScore, } from "./scoring.js";
 export { formatComparisonMarkdown, formatComparisonTable, } from "./comparison-formatters.js";
+export { aggregateAreas, aggregateDimensions, computeEnsembleScore, computeTaskScore, normalizeScore, } from "./scoring-engine.js";
 export { extractModelName, extractProvider, mergeConfig, modelMatchesMode, } from "./config-helpers.js";