npm - @sanity/ailf - Versions diffs - 0.5.0 → 1.0.0 - Mend

@sanity/ailf 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (288) hide show

package/config/features.ts +23 -0
package/config/models.ts +83 -0
package/config/prompts.ts +16 -0
package/config/rubrics.ts +225 -0
package/config/schedules.ts +47 -0
package/config/sinks.ts +37 -0
package/config/sources.ts +21 -0
package/config/thresholds.ts +61 -0
package/dist/_vendor/ailf-core/config-helpers.d.ts +174 -0
package/dist/_vendor/ailf-core/config-helpers.js +150 -0
package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
package/dist/_vendor/ailf-core/env-helper.js +45 -0
package/dist/_vendor/ailf-core/index.d.ts +3 -0
package/dist/_vendor/ailf-core/index.js +5 -0
package/dist/_vendor/ailf-core/ports/context.d.ts +15 -2
package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +7 -2
package/dist/_vendor/ailf-core/schemas/eval-config.js +7 -2
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +8 -3
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +6 -1
package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -29
package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -8
package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
package/dist/_vendor/ailf-core/services/index.js +2 -1
package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
package/dist/_vendor/ailf-core/services/scoring.js +25 -15
package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
package/dist/_vendor/ailf-core/types/generalized-task.d.ts +319 -0
package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
package/dist/_vendor/ailf-core/types/index.d.ts +45 -81
package/dist/_vendor/ailf-core/types/index.js +8 -1
package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +202 -0
package/dist/_vendor/ailf-core/types/plugin-registry.js +132 -0
package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
package/dist/_vendor/ailf-core/types/trace.js +18 -0
package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
package/dist/_vendor/ailf-shared/index.d.ts +0 -1
package/dist/_vendor/ailf-shared/index.js +0 -1
package/dist/adapters/api-client/build-request.js +14 -13
package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
package/dist/adapters/config-sources/file-config-adapter.js +38 -12
package/dist/adapters/config-sources/index.d.ts +2 -0
package/dist/adapters/config-sources/index.js +1 -0
package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
package/dist/adapters/config-sources/ts-config-loader.js +133 -0
package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
package/dist/adapters/task-sources/composite-task-source.js +1 -1
package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
package/dist/adapters/task-sources/content-lake-task-source.js +22 -23
package/dist/adapters/task-sources/index.d.ts +1 -0
package/dist/adapters/task-sources/index.js +1 -0
package/dist/adapters/task-sources/repo-task-source.d.ts +4 -4
package/dist/adapters/task-sources/repo-task-source.js +69 -16
package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
package/dist/adapters/task-sources/task-file-loader.js +83 -0
package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
package/dist/adapters/task-sources/yaml-task-source.js +19 -16
package/dist/cli.js +0 -2
package/dist/commands/baseline.js +4 -1
package/dist/commands/calculate-scores.js +1 -1
package/dist/commands/coverage-audit.js +7 -1
package/dist/commands/explain-handler.js +25 -23
package/dist/commands/fetch-docs.js +3 -2
package/dist/commands/generate-configs.js +1 -1
package/dist/commands/interactive.js +11 -7
package/dist/commands/pipeline-action.d.ts +2 -0
package/dist/commands/pipeline-action.js +16 -6
package/dist/commands/pipeline.d.ts +1 -0
package/dist/commands/pipeline.js +4 -2
package/dist/commands/pr-comment.js +1 -1
package/dist/commands/publish.js +2 -2
package/dist/commands/readiness-report.js +13 -6
package/dist/composition-root.d.ts +1 -1
package/dist/composition-root.js +67 -4
package/dist/orchestration/build-app-context.js +1 -0
package/dist/orchestration/build-step-sequence.js +24 -6
package/dist/orchestration/steps/calculate-scores-step.js +24 -11
package/dist/orchestration/steps/fetch-docs-step.js +6 -4
package/dist/orchestration/steps/gap-analysis-step.js +8 -7
package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
package/dist/orchestration/steps/generate-configs-step.js +245 -51
package/dist/orchestration/steps/grader-consistency-step.js +7 -4
package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
package/dist/orchestration/steps/readiness-step.js +5 -6
package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
package/dist/orchestration/steps/run-eval-step.js +8 -7
package/dist/pipeline/cache.d.ts +1 -1
package/dist/pipeline/cache.js +36 -8
package/dist/pipeline/calculate-scores.d.ts +2 -4
package/dist/pipeline/calculate-scores.js +43 -113
package/dist/pipeline/checks.js +2 -2
package/dist/pipeline/compare.js +8 -8
package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +355 -0
package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +471 -0
package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
package/dist/pipeline/compiler/assertion-mapper.js +175 -0
package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
package/dist/pipeline/compiler/config-loader.d.ts +56 -0
package/dist/pipeline/compiler/config-loader.js +111 -0
package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
package/dist/pipeline/compiler/fixture-resolver.js +113 -0
package/dist/pipeline/compiler/hash.d.ts +11 -0
package/dist/pipeline/compiler/hash.js +18 -0
package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
package/dist/pipeline/compiler/ignore-fields.js +113 -0
package/dist/pipeline/compiler/index.d.ts +29 -0
package/dist/pipeline/compiler/index.js +45 -0
package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
package/dist/pipeline/compiler/literacy-bridge.js +172 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
package/dist/pipeline/compiler/mode-handlers/index.d.ts +16 -0
package/dist/pipeline/compiler/mode-handlers/index.js +21 -0
package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +277 -0
package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +67 -0
package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +309 -0
package/dist/pipeline/compiler/presets/index.d.ts +9 -0
package/dist/pipeline/compiler/presets/index.js +8 -0
package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +45 -0
package/dist/pipeline/compiler/presets/sanity-literacy.js +354 -0
package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
package/dist/pipeline/compiler/provider-assembler.js +137 -0
package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
package/dist/pipeline/compiler/sandbox/index.js +11 -0
package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
package/dist/pipeline/compiler/scoring-bridge.js +114 -0
package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
package/dist/pipeline/compiler/task-graph-builder.js +291 -0
package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
package/dist/pipeline/compiler/telemetry/index.js +19 -0
package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
package/dist/pipeline/compiler/variable-resolver.js +115 -0
package/dist/pipeline/coverage-audit.d.ts +15 -5
package/dist/pipeline/coverage-audit.js +41 -22
package/dist/pipeline/eval-constants.d.ts +16 -6
package/dist/pipeline/eval-constants.js +25 -4
package/dist/pipeline/eval-fingerprint.d.ts +2 -2
package/dist/pipeline/eval-fingerprint.js +8 -9
package/dist/pipeline/expand-tasks.d.ts +19 -10
package/dist/pipeline/expand-tasks.js +34 -28
package/dist/pipeline/gap-analysis.d.ts +1 -1
package/dist/pipeline/gap-analysis.js +2 -2
package/dist/pipeline/generate-configs.d.ts +22 -4
package/dist/pipeline/generate-configs.js +53 -24
package/dist/pipeline/grader-api.d.ts +3 -3
package/dist/pipeline/grader-api.js +5 -12
package/dist/pipeline/grader-compare-runner.js +20 -27
package/dist/pipeline/grader-comparison.d.ts +4 -8
package/dist/pipeline/grader-comparison.js +11 -17
package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
package/dist/pipeline/grader-consistency-runner.js +16 -20
package/dist/pipeline/grader-consistency.d.ts +6 -10
package/dist/pipeline/grader-consistency.js +13 -32
package/dist/pipeline/grader-sensitivity-runner.js +7 -5
package/dist/pipeline/grader-sensitivity.d.ts +2 -6
package/dist/pipeline/grader-sensitivity.js +10 -10
package/dist/pipeline/grader-validate-runner.js +7 -5
package/dist/pipeline/grader-validation.d.ts +2 -6
package/dist/pipeline/grader-validation.js +14 -22
package/dist/pipeline/map-request-to-config.js +6 -1
package/dist/pipeline/mirror-repo-tasks.d.ts +6 -6
package/dist/pipeline/mirror-repo-tasks.js +16 -15
package/dist/pipeline/normalize-mode.d.ts +49 -0
package/dist/pipeline/normalize-mode.js +64 -0
package/dist/pipeline/plan.d.ts +5 -2
package/dist/pipeline/plan.js +134 -78
package/dist/pipeline/pr-comment.js +2 -0
package/dist/pipeline/profile-resolution.d.ts +22 -14
package/dist/pipeline/profile-resolution.js +41 -19
package/dist/pipeline/provenance.d.ts +2 -2
package/dist/pipeline/provenance.js +12 -17
package/dist/pipeline/release-report.js +4 -4
package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
package/dist/pipeline/repo-threshold-evaluator.js +1 -1
package/dist/pipeline/rubric-loader.d.ts +20 -0
package/dist/pipeline/rubric-loader.js +37 -0
package/dist/pipeline/validate.d.ts +4 -4
package/dist/pipeline/validate.js +64 -53
package/dist/schedules/loader.js +18 -8
package/dist/scripts/migrate-task-mode.d.ts +24 -0
package/dist/scripts/migrate-task-mode.js +85 -0
package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
package/dist/scripts/validate-task-sources.d.ts +1 -1
package/dist/scripts/validate-task-sources.js +15 -15
package/dist/sinks/loader.js +5 -7
package/dist/sources.d.ts +7 -7
package/dist/sources.js +22 -24
package/dist/webhook/dispatch.js +2 -1
package/package.json +6 -3
package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
package/tasks/literacy/frameworks.task.ts +128 -0
package/tasks/literacy/functions.task.ts +69 -0
package/tasks/literacy/groq.task.ts +258 -0
package/tasks/literacy/nextjs-live.task.ts +75 -0
package/tasks/literacy/studio-setup.task.ts +131 -0
package/tasks/literacy/visual-editing.task.ts +146 -0
package/config/features.yaml +0 -116
package/config/models.yaml +0 -116
package/config/prompts.yaml +0 -75
package/config/rubrics.yaml +0 -81
package/config/schedules.yaml +0 -43
package/config/sinks.yaml +0 -54
package/config/sources.yaml +0 -51
package/config/thresholds.yaml +0 -49
package/dist/agent-observer/test-imports.d.ts +0 -7
package/dist/agent-observer/test-imports.js +0 -185

package/dist/pipeline/compiler/scoring-bridge.d.ts ADDED Viewed

@@ -0,0 +1,49 @@
+/**
+ * scoring-bridge.ts — Bridge between Promptfoo raw results and the
+ * 4-tier scoring engine.
+ *
+ * Converts Promptfoo `ComponentResult[]` (from test results) into the
+ * scoring engine's `AssertionScore[]` format, then delegates aggregation
+ * to `aggregateDimensions` and `computeTaskScore` from core.
+ *
+ * This bridge replaces the three legacy scoring primitives in
+ * `calculate-scores.ts`:
+ *   - `accumulateDimensions`  → `convertToAssertionScores` + `aggregateDimensions`
+ *   - `averageDimensions`     → (handled internally by `aggregateDimensions`)
+ *   - `weightedComposite`     → `computeTaskScore`
+ *
+ * The bridge preserves the existing 0–100 output scale. The 4-tier
+ * engine works in [0, 1]; this module handles the conversion at
+ * boundaries.
+ *
+ * @see packages/core/src/services/scoring-engine.ts — the 4-tier engine
+ * @see packages/eval/src/pipeline/calculate-scores.ts — the consumer
+ * @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
+ */
+import { type DimensionScore } from "../../_vendor/ailf-core/index.d.ts";
+import type { TestResult } from "../../_vendor/ailf-core/index.d.ts";
+/** Result of scoring a group of tests via the 4-tier engine */
+export interface BridgedScoreResult {
+    /** Per-dimension breakdown (0–100 scale) */
+    dimensions: Record<string, number>;
+    /** Weighted composite score (0–100 scale) */
+    composite: number;
+    /** Total cost across all tests */
+    totalCost: number;
+    /** Raw DimensionScore objects from the engine (0–1 scale) */
+    rawDimensions: DimensionScore[];
+}
+/**
+ * Score a group of test results using the 4-tier scoring engine.
+ *
+ * This replaces the legacy `accumulateDimensions → averageDimensions →
+ * weightedComposite` chain with the new engine's `aggregateDimensions →
+ * computeTaskScore` chain.
+ *
+ * @param tests    Pre-filtered test results (e.g., all gold or all baseline)
+ * @param profile  Weight profile mapping kebab-case dimension names to weights
+ *                 (e.g., `{ "task-completion": 0.4, "code-correctness": 0.35, "doc-coverage": 0.25 }`)
+ * @param taskId   Optional task identifier for traceability in TaskScore output
+ * @returns        Dimensions (0–100) and composite (0–100), matching legacy output format
+ */
+export declare function scoreTestGroup(tests: TestResult[], profile: Record<string, number>, taskId?: string): BridgedScoreResult;

package/dist/pipeline/compiler/scoring-bridge.js ADDED Viewed

@@ -0,0 +1,114 @@
+/**
+ * scoring-bridge.ts — Bridge between Promptfoo raw results and the
+ * 4-tier scoring engine.
+ *
+ * Converts Promptfoo `ComponentResult[]` (from test results) into the
+ * scoring engine's `AssertionScore[]` format, then delegates aggregation
+ * to `aggregateDimensions` and `computeTaskScore` from core.
+ *
+ * This bridge replaces the three legacy scoring primitives in
+ * `calculate-scores.ts`:
+ *   - `accumulateDimensions`  → `convertToAssertionScores` + `aggregateDimensions`
+ *   - `averageDimensions`     → (handled internally by `aggregateDimensions`)
+ *   - `weightedComposite`     → `computeTaskScore`
+ *
+ * The bridge preserves the existing 0–100 output scale. The 4-tier
+ * engine works in [0, 1]; this module handles the conversion at
+ * boundaries.
+ *
+ * @see packages/core/src/services/scoring-engine.ts — the 4-tier engine
+ * @see packages/eval/src/pipeline/calculate-scores.ts — the consumer
+ * @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
+ */
+import { aggregateDimensions, computeTaskScore, normalizeScore, } from "../../_vendor/ailf-core/index.js";
+import { classifyRubric, parseRubricScore } from "../../_vendor/ailf-core/index.js";
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+/**
+ * Score a group of test results using the 4-tier scoring engine.
+ *
+ * This replaces the legacy `accumulateDimensions → averageDimensions →
+ * weightedComposite` chain with the new engine's `aggregateDimensions →
+ * computeTaskScore` chain.
+ *
+ * @param tests    Pre-filtered test results (e.g., all gold or all baseline)
+ * @param profile  Weight profile mapping kebab-case dimension names to weights
+ *                 (e.g., `{ "task-completion": 0.4, "code-correctness": 0.35, "doc-coverage": 0.25 }`)
+ * @param taskId   Optional task identifier for traceability in TaskScore output
+ * @returns        Dimensions (0–100) and composite (0–100), matching legacy output format
+ */
+export function scoreTestGroup(tests, profile, taskId) {
+    let totalCost = 0;
+    // Step 1: Convert all ComponentResults into AssertionScore[] (0–1 scale)
+    const assertionScores = [];
+    for (const test of tests) {
+        totalCost += test.cost;
+        for (const comp of test.gradingResult.componentResults) {
+            if (comp.assertion?.type !== "llm-rubric")
+                continue;
+            const converted = componentToAssertionScore(comp);
+            if (converted)
+                assertionScores.push(converted);
+        }
+    }
+    // Step 2: Aggregate into DimensionScores (0–1 scale)
+    const dimensionLabels = {
+        "code-correctness": "Code Correctness",
+        "doc-coverage": "Doc Coverage",
+        "task-completion": "Task Completion",
+    };
+    const rawDimensions = aggregateDimensions(assertionScores, {
+        defaultAggregation: "mean",
+        dimensionLabels,
+    });
+    // Step 3: Compute weighted composite via TaskScore (0–1 scale)
+    const taskScoreResult = computeTaskScore(rawDimensions, {
+        taskId: taskId ?? "aggregate",
+        weights: profile,
+        weightSource: "scoring-bridge",
+    });
+    // Step 4: Convert back to 0–100 scale for legacy compatibility
+    const dimensions = {};
+    for (const dim of rawDimensions) {
+        // Map kebab-case dimension IDs to camelCase for legacy compatibility
+        const camelKey = kebabToCamel(dim.dimensionId);
+        dimensions[camelKey] = Math.round(dim.score * 100);
+    }
+    return {
+        composite: Math.round(taskScoreResult.score * 100),
+        dimensions,
+        rawDimensions,
+        totalCost,
+    };
+}
+// ---------------------------------------------------------------------------
+// Conversion helpers
+// ---------------------------------------------------------------------------
+/**
+ * Convert a single Promptfoo ComponentResult into the scoring engine's
+ * AssertionScore format.
+ *
+ * Returns null if the component doesn't map to a known dimension.
+ */
+function componentToAssertionScore(comp) {
+    const dim = classifyRubric(comp);
+    if (!dim)
+        return null;
+    // Parse the raw score (0–100 from the grader) and normalize to [0, 1]
+    const rawScore = parseRubricScore(comp);
+    const normalized = normalizeScore(rawScore, "llm-rubric");
+    return {
+        assertionType: comp.assertion?.type ?? "llm-rubric",
+        dimension: dim,
+        latencyMs: 0,
+        pass: comp.pass,
+        reason: comp.reason ?? "",
+        score: normalized,
+        weight: 1.0,
+    };
+}
+/** Convert kebab-case dimension key to camelCase (e.g., "task-completion" → "taskCompletion") */
+function kebabToCamel(kebab) {
+    return kebab.replace(/-([a-z])/g, (_, c) => c.toUpperCase());
+}

package/dist/pipeline/compiler/task-graph-builder.d.ts ADDED Viewed

@@ -0,0 +1,54 @@
+/**
+ * TaskGraphBuilder — converts task definitions into a TaskGraph IR.
+ *
+ * The builder is the first stage of the compilation pipeline:
+ *   GeneralizedTaskDefinitions → TaskGraphBuilder → TaskGraph → PromptfooCompiler → YAML
+ *
+ * Responsibilities:
+ * - Accept tasks from any source (TS, YAML, Content Lake)
+ * - Apply area/tag/mode filtering
+ * - Resolve inter-task dependencies into edges
+ * - Validate the graph is a DAG (reject cycles)
+ * - Assign execution priority via topological sort
+ *
+ * This module exists alongside `generate-configs.ts` — it does NOT replace
+ * the existing codegen path. Phase 7 will swap callers over to the compiler.
+ *
+ * @see packages/core/src/types/task-graph.ts — TaskGraph types
+ * @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
+ */
+import type { DependencyEdge, FilterOptions, GeneralizedTaskDefinition, TaskGraph, TaskNode } from "../../_vendor/ailf-core/index.d.ts";
+/** Options for building a task graph */
+export interface TaskGraphBuildOptions {
+    /** Task definitions from any source */
+    tasks: GeneralizedTaskDefinition[];
+    /** Optional filter to narrow task set */
+    filter?: FilterOptions;
+    /** Compilation target backend */
+    compilationTarget?: "custom" | "promptfoo";
+}
+/** Result of building a task graph */
+export interface TaskGraphBuildResult {
+    /** The built graph (null if no tasks survived filtering) */
+    graph: TaskGraph | null;
+    /** Warnings emitted during build (non-fatal) */
+    warnings: string[];
+    /** Tasks that were filtered out */
+    filteredOut: string[];
+}
+/**
+ * Build a TaskGraph from task definitions.
+ *
+ * 1. Filters tasks by area, tags, task IDs, and status
+ * 2. Creates TaskNodes with resolved variables
+ * 3. Discovers dependency edges from task metadata
+ * 4. Validates the graph is acyclic
+ * 5. Assigns topological priority
+ */
+export declare function buildTaskGraph(options: TaskGraphBuildOptions): TaskGraphBuildResult;
+/**
+ * Detect cycles in the task graph using Kahn's algorithm.
+ *
+ * @returns null if acyclic, or the cycle path as a string array
+ */
+export declare function detectCycle(nodes: Map<string, TaskNode>, edges: DependencyEdge[]): string[] | null;

package/dist/pipeline/compiler/task-graph-builder.js ADDED Viewed

@@ -0,0 +1,291 @@
+/**
+ * TaskGraphBuilder — converts task definitions into a TaskGraph IR.
+ *
+ * The builder is the first stage of the compilation pipeline:
+ *   GeneralizedTaskDefinitions → TaskGraphBuilder → TaskGraph → PromptfooCompiler → YAML
+ *
+ * Responsibilities:
+ * - Accept tasks from any source (TS, YAML, Content Lake)
+ * - Apply area/tag/mode filtering
+ * - Resolve inter-task dependencies into edges
+ * - Validate the graph is a DAG (reject cycles)
+ * - Assign execution priority via topological sort
+ *
+ * This module exists alongside `generate-configs.ts` — it does NOT replace
+ * the existing codegen path. Phase 7 will swap callers over to the compiler.
+ *
+ * @see packages/core/src/types/task-graph.ts — TaskGraph types
+ * @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
+ */
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+/**
+ * Build a TaskGraph from task definitions.
+ *
+ * 1. Filters tasks by area, tags, task IDs, and status
+ * 2. Creates TaskNodes with resolved variables
+ * 3. Discovers dependency edges from task metadata
+ * 4. Validates the graph is acyclic
+ * 5. Assigns topological priority
+ */
+export function buildTaskGraph(options) {
+    const warnings = [];
+    const filteredOut = [];
+    // Step 1: Filter tasks
+    const filtered = filterTasks(options.tasks, options.filter, filteredOut);
+    if (filtered.length === 0) {
+        return {
+            graph: null,
+            warnings: ["No tasks matched the filter criteria"],
+            filteredOut,
+        };
+    }
+    // Step 2: Create nodes
+    const nodes = new Map();
+    for (const task of filtered) {
+        const node = taskToNode(task);
+        if (nodes.has(node.taskId)) {
+            warnings.push(`Duplicate task ID "${node.taskId}" — later definition wins`);
+        }
+        nodes.set(node.taskId, node);
+    }
+    // Step 3: Discover edges from dependency metadata
+    const edges = discoverEdges(filtered, nodes, warnings);
+    // Step 4: Validate acyclicity
+    const cycleError = detectCycle(nodes, edges);
+    if (cycleError) {
+        throw new Error(`Task graph contains a cycle: ${cycleError.join(" → ")}. ` +
+            "Task graphs must be directed acyclic graphs (DAGs).");
+    }
+    // Step 5: Assign topological priority
+    assignPriority(nodes, edges);
+    // Step 6: Build fixture map (empty for now — Phase 2d fills this)
+    const fixtures = new Map();
+    const graph = {
+        compilationTarget: options.compilationTarget ?? "promptfoo",
+        edges,
+        fixtures,
+        nodes,
+    };
+    return { graph, warnings, filteredOut };
+}
+// ---------------------------------------------------------------------------
+// Filtering
+// ---------------------------------------------------------------------------
+function filterTasks(tasks, filter, filteredOut) {
+    return tasks.filter((task) => {
+        // Status filter — always applied (even without explicit filter options)
+        const status = task.status ?? "active";
+        const isTargetedById = filter?.taskIds && filter.taskIds.includes(task.id);
+        if (status === "archived") {
+            filteredOut.push(task.id);
+            return false;
+        }
+        if (status === "paused" && !isTargetedById) {
+            filteredOut.push(task.id);
+            return false;
+        }
+        if (status === "draft" && !isTargetedById && !filter?.includeDrafts) {
+            filteredOut.push(task.id);
+            return false;
+        }
+        // Remaining filters only apply when an explicit filter is provided
+        if (!filter)
+            return true;
+        // Area filter — GeneralizedTaskDefinition uses `area` (not `featureArea`)
+        const taskArea = task.area ?? "";
+        if (filter.areas &&
+            filter.areas.length > 0 &&
+            !filter.areas.map((a) => a.toLowerCase()).includes(taskArea.toLowerCase())) {
+            filteredOut.push(task.id);
+            return false;
+        }
+        // Task ID filter
+        if (filter.taskIds &&
+            filter.taskIds.length > 0 &&
+            !filter.taskIds.includes(task.id)) {
+            filteredOut.push(task.id);
+            return false;
+        }
+        // Tag filter
+        if (filter.tags &&
+            filter.tags.length > 0 &&
+            (!task.tags || !task.tags.some((t) => filter.tags.includes(t)))) {
+            filteredOut.push(task.id);
+            return false;
+        }
+        return true;
+    });
+}
+// ---------------------------------------------------------------------------
+// Node creation
+// ---------------------------------------------------------------------------
+function taskToNode(task) {
+    // GeneralizedTaskDefinition uses prompt.text/prompt.template instead of taskPrompt,
+    // and prompt.vars instead of extraVars
+    const promptText = task.prompt?.text ?? task.prompt?.template ?? "";
+    const promptVars = task.prompt?.vars ?? {};
+    const envelope = {
+        declarations: [],
+        provenance: {},
+        values: {
+            ...(promptText ? { task: promptText } : {}),
+            ...promptVars,
+        },
+    };
+    return {
+        dependsOn: [],
+        mode: task.mode,
+        priority: 0,
+        resolvedPrompt: promptText,
+        resolvedVariables: envelope,
+        taskId: task.id,
+    };
+}
+// ---------------------------------------------------------------------------
+// Edge discovery
+// ---------------------------------------------------------------------------
+/**
+ * Discover dependency edges from task metadata.
+ *
+ * Looks for explicit `dependsOn` arrays in prompt.vars (the generalized
+ * equivalent of the old extraVars convention).
+ * Future phases will add implicit deps from fixture sharing, data flow, etc.
+ */
+function discoverEdges(tasks, nodes, warnings) {
+    const edges = [];
+    for (const task of tasks) {
+        // Check for explicit dependencies in prompt.vars (was extraVars.dependsOn)
+        const deps = task.prompt?.vars?.dependsOn;
+        if (Array.isArray(deps)) {
+            for (const dep of deps) {
+                if (typeof dep !== "string")
+                    continue;
+                if (!nodes.has(dep)) {
+                    warnings.push(`Task "${task.id}" depends on "${dep}" which is not in the graph — ` +
+                        "dependency ignored (task may have been filtered out)");
+                    continue;
+                }
+                edges.push({ from: dep, to: task.id, type: "ordering" });
+                const node = nodes.get(task.id);
+                if (node && !node.dependsOn.includes(dep)) {
+                    node.dependsOn.push(dep);
+                }
+            }
+        }
+    }
+    return edges;
+}
+// ---------------------------------------------------------------------------
+// Cycle detection — Kahn's algorithm (topological sort)
+// ---------------------------------------------------------------------------
+/**
+ * Detect cycles in the task graph using Kahn's algorithm.
+ *
+ * @returns null if acyclic, or the cycle path as a string array
+ */
+export function detectCycle(nodes, edges) {
+    // Build in-degree map
+    const inDegree = new Map();
+    const adjacency = new Map();
+    for (const id of nodes.keys()) {
+        inDegree.set(id, 0);
+        adjacency.set(id, []);
+    }
+    for (const edge of edges) {
+        adjacency.get(edge.from).push(edge.to);
+        inDegree.set(edge.to, (inDegree.get(edge.to) ?? 0) + 1);
+    }
+    // Start with all zero-in-degree nodes
+    const queue = [];
+    for (const [id, deg] of inDegree) {
+        if (deg === 0)
+            queue.push(id);
+    }
+    let visited = 0;
+    while (queue.length > 0) {
+        const current = queue.shift();
+        visited++;
+        for (const neighbor of adjacency.get(current) ?? []) {
+            const newDeg = (inDegree.get(neighbor) ?? 1) - 1;
+            inDegree.set(neighbor, newDeg);
+            if (newDeg === 0)
+                queue.push(neighbor);
+        }
+    }
+    if (visited === nodes.size)
+        return null;
+    // Find cycle participants (nodes with remaining in-degree > 0)
+    const cycleNodes = [...inDegree.entries()]
+        .filter(([, deg]) => deg > 0)
+        .map(([id]) => id);
+    // Reconstruct a cycle path for the error message
+    return reconstructCyclePath(cycleNodes, adjacency);
+}
+/**
+ * Reconstruct a human-readable cycle path from cycle participants.
+ */
+function reconstructCyclePath(cycleNodes, adjacency) {
+    if (cycleNodes.length === 0)
+        return [];
+    const inCycle = new Set(cycleNodes);
+    const start = cycleNodes[0];
+    const path = [start];
+    const visited = new Set();
+    let current = start;
+    // Follow edges within the cycle to produce a readable path
+    while (true) {
+        visited.add(current);
+        const next = (adjacency.get(current) ?? []).find((n) => inCycle.has(n) && (!visited.has(n) || n === start));
+        if (!next)
+            break;
+        path.push(next);
+        if (next === start)
+            break; // Completed the cycle
+        current = next;
+    }
+    return path;
+}
+// ---------------------------------------------------------------------------
+// Topological priority assignment
+// ---------------------------------------------------------------------------
+/**
+ * Assign execution priority via topological order.
+ * Lower priority = earlier execution.
+ */
+function assignPriority(nodes, edges) {
+    const inDegree = new Map();
+    const adjacency = new Map();
+    for (const id of nodes.keys()) {
+        inDegree.set(id, 0);
+        adjacency.set(id, []);
+    }
+    for (const edge of edges) {
+        adjacency.get(edge.from).push(edge.to);
+        inDegree.set(edge.to, (inDegree.get(edge.to) ?? 0) + 1);
+    }
+    const queue = [];
+    for (const [id, deg] of inDegree) {
+        if (deg === 0)
+            queue.push(id);
+    }
+    let priority = 0;
+    while (queue.length > 0) {
+        // Process all nodes at the current level (same priority)
+        const levelSize = queue.length;
+        for (let i = 0; i < levelSize; i++) {
+            const current = queue.shift();
+            const node = nodes.get(current);
+            if (node)
+                node.priority = priority;
+            for (const neighbor of adjacency.get(current) ?? []) {
+                const newDeg = (inDegree.get(neighbor) ?? 1) - 1;
+                inDegree.set(neighbor, newDeg);
+                if (newDeg === 0)
+                    queue.push(neighbor);
+            }
+        }
+        priority++;
+    }
+}

package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts ADDED Viewed

@@ -0,0 +1,90 @@
+/**
+ * Cost tracking — model pricing, pre-run estimation, and post-run actuals.
+ *
+ * Uses a pricing table (YAML config or TS `definePricingTable()`) to compute
+ * USD cost from token usage. Supports budget controls with warn/stop thresholds.
+ *
+ * @see docs/design-docs/architecture-overhaul/observability-telemetry.md
+ */
+import type { TraceTokenUsage } from "../../../_vendor/ailf-core/index.d.ts";
+/** Per-model pricing (USD per 1M tokens) */
+export interface ModelPricing {
+    /** Input tokens cost per 1M tokens */
+    input: number;
+    /** Output tokens cost per 1M tokens */
+    output: number;
+    /** Cached input tokens cost per 1M tokens (optional) */
+    cachedInput?: number;
+}
+/** Budget control thresholds (in USD) */
+export interface BudgetConfig {
+    perRun?: {
+        warn: number;
+        stop: number;
+    };
+    perTask?: {
+        warn: number;
+        stop: number;
+    };
+}
+/** Cost estimate for a pipeline run */
+export interface CostEstimate {
+    /** Estimated total cost in USD */
+    totalUSD: number;
+    /** Per-model breakdown */
+    perModel: {
+        modelId: string;
+        estimatedUSD: number;
+    }[];
+    /** Whether estimate exceeds budget warning threshold */
+    exceedsWarning: boolean;
+    /** Whether estimate exceeds budget stop threshold */
+    exceedsStop: boolean;
+}
+/** Actual cost computed from real token usage */
+export interface ActualCost {
+    /** Actual total cost in USD */
+    totalUSD: number;
+    /** Per-model actual cost */
+    perModel: {
+        modelId: string;
+        actualUSD: number;
+        tokens: TraceTokenUsage;
+    }[];
+}
+/** Budget check result */
+export interface BudgetCheckResult {
+    /** Whether to proceed */
+    proceed: boolean;
+    /** Warning message (if any) */
+    warning?: string;
+    /** Current spend in USD */
+    currentUSD: number;
+    /** Budget limit that was checked */
+    limitUSD?: number;
+}
+/**
+ * Compute actual cost from token usage and model pricing.
+ *
+ * @param usage - Token counts from provider response
+ * @param pricing - Per-model pricing (USD per 1M tokens)
+ * @returns Cost in USD
+ */
+export declare function computeCost(usage: TraceTokenUsage, pricing: ModelPricing): number;
+/**
+ * Look up pricing for a model ID.
+ *
+ * Tries exact match first, then falls back to prefix matching
+ * (e.g., "openai:chat:gpt-4o-2024-11-20" matches "openai:chat:gpt-4o").
+ */
+export declare function lookupPricing(modelId: string, customPricing?: Record<string, ModelPricing>): ModelPricing | undefined;
+/**
+ * Estimate cost for a pipeline run before execution.
+ *
+ * Uses task count, estimated tokens per task complexity, and model pricing.
+ */
+export declare function estimateRunCost(taskCount: number, modelIds: string[], budget?: BudgetConfig, customPricing?: Record<string, ModelPricing>): CostEstimate;
+/**
+ * Check if current spend exceeds budget thresholds.
+ */
+export declare function checkBudget(currentUSD: number, budget: BudgetConfig, level: "perRun" | "perTask"): BudgetCheckResult;