@sanity/ailf 0.4.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/features.ts +23 -0
- package/config/models.ts +83 -0
- package/config/prompts.ts +16 -0
- package/config/rubrics.ts +225 -0
- package/config/schedules.ts +47 -0
- package/config/sinks.ts +37 -0
- package/config/sources.ts +21 -0
- package/config/thresholds.ts +61 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +174 -0
- package/dist/_vendor/ailf-core/config-helpers.js +150 -0
- package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
- package/dist/_vendor/ailf-core/env-helper.js +45 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +10 -10
- package/dist/_vendor/ailf-core/examples/index.js +10 -10
- package/dist/_vendor/ailf-core/index.d.ts +3 -0
- package/dist/_vendor/ailf-core/index.js +5 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +15 -2
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
- package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
- package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +7 -2
- package/dist/_vendor/ailf-core/schemas/eval-config.js +7 -2
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +8 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +6 -1
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +32 -31
- package/dist/_vendor/ailf-core/schemas/pipeline.js +52 -12
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
- package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
- package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/services/index.js +2 -1
- package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
- package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
- package/dist/_vendor/ailf-core/services/scoring.js +25 -15
- package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
- package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +319 -0
- package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +45 -81
- package/dist/_vendor/ailf-core/types/index.js +8 -1
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +202 -0
- package/dist/_vendor/ailf-core/types/plugin-registry.js +132 -0
- package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
- package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
- package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
- package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
- package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
- package/dist/_vendor/ailf-core/types/trace.js +18 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
- package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
- package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
- package/dist/_vendor/ailf-shared/index.d.ts +0 -1
- package/dist/_vendor/ailf-shared/index.js +0 -1
- package/dist/adapters/api-client/build-request.js +14 -13
- package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
- package/dist/adapters/config-sources/file-config-adapter.js +38 -12
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +1 -0
- package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
- package/dist/adapters/config-sources/ts-config-loader.js +133 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
- package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +22 -23
- package/dist/adapters/task-sources/index.d.ts +1 -0
- package/dist/adapters/task-sources/index.js +1 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +4 -4
- package/dist/adapters/task-sources/repo-task-source.js +69 -16
- package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
- package/dist/adapters/task-sources/task-file-loader.js +83 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
- package/dist/adapters/task-sources/yaml-task-source.js +19 -16
- package/dist/cli.js +0 -2
- package/dist/commands/baseline.js +4 -1
- package/dist/commands/calculate-scores.js +1 -1
- package/dist/commands/coverage-audit.js +7 -1
- package/dist/commands/explain-handler.js +25 -23
- package/dist/commands/fetch-docs.js +3 -2
- package/dist/commands/generate-configs.js +1 -1
- package/dist/commands/interactive.js +11 -7
- package/dist/commands/pipeline-action.d.ts +2 -0
- package/dist/commands/pipeline-action.js +16 -6
- package/dist/commands/pipeline.d.ts +1 -0
- package/dist/commands/pipeline.js +4 -2
- package/dist/commands/pr-comment.js +1 -1
- package/dist/commands/publish.js +2 -2
- package/dist/commands/readiness-report.js +13 -6
- package/dist/composition-root.d.ts +1 -1
- package/dist/composition-root.js +67 -4
- package/dist/orchestration/build-app-context.js +1 -0
- package/dist/orchestration/build-step-sequence.js +24 -6
- package/dist/orchestration/steps/calculate-scores-step.js +24 -11
- package/dist/orchestration/steps/fetch-docs-step.js +6 -4
- package/dist/orchestration/steps/gap-analysis-step.js +8 -7
- package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
- package/dist/orchestration/steps/generate-configs-step.js +245 -51
- package/dist/orchestration/steps/grader-consistency-step.js +7 -4
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/readiness-step.js +5 -6
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
- package/dist/orchestration/steps/run-eval-step.js +8 -7
- package/dist/pipeline/cache.d.ts +1 -1
- package/dist/pipeline/cache.js +36 -8
- package/dist/pipeline/calculate-scores.d.ts +5 -7
- package/dist/pipeline/calculate-scores.js +74 -153
- package/dist/pipeline/checks.js +2 -2
- package/dist/pipeline/compare.js +8 -8
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +355 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +471 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
- package/dist/pipeline/compiler/assertion-mapper.js +175 -0
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
- package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
- package/dist/pipeline/compiler/config-loader.d.ts +56 -0
- package/dist/pipeline/compiler/config-loader.js +111 -0
- package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
- package/dist/pipeline/compiler/fixture-resolver.js +113 -0
- package/dist/pipeline/compiler/hash.d.ts +11 -0
- package/dist/pipeline/compiler/hash.js +18 -0
- package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
- package/dist/pipeline/compiler/ignore-fields.js +113 -0
- package/dist/pipeline/compiler/index.d.ts +29 -0
- package/dist/pipeline/compiler/index.js +45 -0
- package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
- package/dist/pipeline/compiler/literacy-bridge.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.js +21 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +277 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +67 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +309 -0
- package/dist/pipeline/compiler/presets/index.d.ts +9 -0
- package/dist/pipeline/compiler/presets/index.js +8 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +45 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.js +354 -0
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
- package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
- package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
- package/dist/pipeline/compiler/provider-assembler.js +137 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
- package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
- package/dist/pipeline/compiler/sandbox/index.js +11 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
- package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
- package/dist/pipeline/compiler/scoring-bridge.js +114 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
- package/dist/pipeline/compiler/task-graph-builder.js +291 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
- package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
- package/dist/pipeline/compiler/telemetry/index.js +19 -0
- package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
- package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
- package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
- package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
- package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
- package/dist/pipeline/compiler/variable-resolver.js +115 -0
- package/dist/pipeline/coverage-audit.d.ts +15 -5
- package/dist/pipeline/coverage-audit.js +41 -22
- package/dist/pipeline/eval-constants.d.ts +16 -6
- package/dist/pipeline/eval-constants.js +25 -4
- package/dist/pipeline/eval-fingerprint.d.ts +2 -2
- package/dist/pipeline/eval-fingerprint.js +8 -9
- package/dist/pipeline/expand-tasks.d.ts +23 -14
- package/dist/pipeline/expand-tasks.js +37 -31
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +2 -2
- package/dist/pipeline/generate-configs.d.ts +22 -4
- package/dist/pipeline/generate-configs.js +53 -24
- package/dist/pipeline/grader-api.d.ts +3 -3
- package/dist/pipeline/grader-api.js +5 -12
- package/dist/pipeline/grader-compare-runner.js +20 -27
- package/dist/pipeline/grader-comparison.d.ts +4 -8
- package/dist/pipeline/grader-comparison.js +11 -17
- package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
- package/dist/pipeline/grader-consistency-runner.js +18 -21
- package/dist/pipeline/grader-consistency.d.ts +6 -10
- package/dist/pipeline/grader-consistency.js +13 -32
- package/dist/pipeline/grader-sensitivity-runner.js +7 -5
- package/dist/pipeline/grader-sensitivity.d.ts +2 -6
- package/dist/pipeline/grader-sensitivity.js +10 -10
- package/dist/pipeline/grader-validate-runner.js +7 -5
- package/dist/pipeline/grader-validation.d.ts +2 -6
- package/dist/pipeline/grader-validation.js +14 -22
- package/dist/pipeline/map-request-to-config.js +6 -1
- package/dist/pipeline/mirror-repo-tasks.d.ts +6 -6
- package/dist/pipeline/mirror-repo-tasks.js +16 -15
- package/dist/pipeline/normalize-mode.d.ts +49 -0
- package/dist/pipeline/normalize-mode.js +64 -0
- package/dist/pipeline/plan.d.ts +5 -2
- package/dist/pipeline/plan.js +134 -78
- package/dist/pipeline/pr-comment.js +2 -0
- package/dist/pipeline/profile-resolution.d.ts +47 -0
- package/dist/pipeline/profile-resolution.js +91 -0
- package/dist/pipeline/provenance.d.ts +2 -2
- package/dist/pipeline/provenance.js +12 -17
- package/dist/pipeline/release-report.js +4 -4
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/rubric-loader.d.ts +20 -0
- package/dist/pipeline/rubric-loader.js +37 -0
- package/dist/pipeline/validate.d.ts +4 -4
- package/dist/pipeline/validate.js +64 -53
- package/dist/schedules/loader.js +18 -8
- package/dist/scripts/migrate-task-mode.d.ts +24 -0
- package/dist/scripts/migrate-task-mode.js +85 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +15 -15
- package/dist/sinks/loader.js +5 -7
- package/dist/sources.d.ts +7 -7
- package/dist/sources.js +22 -24
- package/dist/webhook/dispatch.js +2 -1
- package/package.json +6 -3
- package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
- package/tasks/literacy/frameworks.task.ts +128 -0
- package/tasks/literacy/functions.task.ts +69 -0
- package/tasks/literacy/groq.task.ts +258 -0
- package/tasks/literacy/nextjs-live.task.ts +75 -0
- package/tasks/literacy/studio-setup.task.ts +131 -0
- package/tasks/literacy/visual-editing.task.ts +146 -0
- package/config/features.yaml +0 -116
- package/config/models.yaml +0 -116
- package/config/prompts.yaml +0 -75
- package/config/rubrics.yaml +0 -62
- package/config/schedules.yaml +0 -43
- package/config/sinks.yaml +0 -54
- package/config/sources.yaml +0 -51
- package/config/thresholds.yaml +0 -49
- package/dist/agent-observer/test-imports.d.ts +0 -7
- package/dist/agent-observer/test-imports.js +0 -185
|
@@ -2,17 +2,15 @@
|
|
|
2
2
|
* coverage-audit.ts
|
|
3
3
|
*
|
|
4
4
|
* Pure computation functions for cross-referencing the product feature registry
|
|
5
|
-
* (config/features
|
|
5
|
+
* (config/features) against actual task files (tasks/*.yaml)
|
|
6
6
|
* to produce a documentation coverage audit.
|
|
7
7
|
*
|
|
8
8
|
* Phase 3c of the Scenario Matrix implementation.
|
|
9
9
|
*
|
|
10
10
|
* @see docs/exec-plans/scenario-matrix-implementation/phase-3-gap-analysis.md
|
|
11
11
|
*/
|
|
12
|
-
import { existsSync, readFileSync } from "fs";
|
|
13
|
-
import { join } from "path";
|
|
14
|
-
import { load } from "js-yaml";
|
|
15
12
|
import { ConsoleLogger } from "../adapters/loggers/index.js";
|
|
13
|
+
import { tryLoadConfigFile } from "./compiler/config-loader.js";
|
|
16
14
|
import { FeatureRegistrySchema } from "./schemas.js";
|
|
17
15
|
import { resolveMappings } from "./resolve-mappings.js";
|
|
18
16
|
// ---------------------------------------------------------------------------
|
|
@@ -112,31 +110,52 @@ export function formatCoverageMarkdown(report) {
|
|
|
112
110
|
// Formatting
|
|
113
111
|
// ---------------------------------------------------------------------------
|
|
114
112
|
/**
|
|
115
|
-
* Load and validate the feature registry
|
|
113
|
+
* Load and validate the feature registry.
|
|
114
|
+
*
|
|
115
|
+
* Resolution order:
|
|
116
|
+
* 1. config/features file (user overrides) — if non-empty, wins
|
|
117
|
+
* 2. Registry features (preset-provided) — fallback when config is empty
|
|
118
|
+
* 3. null — no features available
|
|
116
119
|
*/
|
|
117
|
-
export function loadFeatureRegistry(rootDir,
|
|
118
|
-
const log = logger ?? new ConsoleLogger();
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
log.error(` ${issue.path.join(".")}: ${issue.message}`);
|
|
120
|
+
export function loadFeatureRegistry(rootDir, options) {
|
|
121
|
+
const log = options?.logger ?? new ConsoleLogger();
|
|
122
|
+
// Priority 1: config file (user overrides)
|
|
123
|
+
const loaded = tryLoadConfigFile("features", rootDir);
|
|
124
|
+
if (loaded) {
|
|
125
|
+
// Check for intentionally empty config (stub for preset override).
|
|
126
|
+
// The Zod schema requires .min(1), so an empty features array would
|
|
127
|
+
// fail validation — but that's the expected state when the preset
|
|
128
|
+
// provides features and the config file is just an override point.
|
|
129
|
+
const raw = loaded.data;
|
|
130
|
+
if (Array.isArray(raw?.features) && raw.features.length === 0) {
|
|
131
|
+
// Empty config — fall through to registry
|
|
130
132
|
}
|
|
131
|
-
|
|
133
|
+
else {
|
|
134
|
+
const result = FeatureRegistrySchema.safeParse(loaded.data);
|
|
135
|
+
if (!result.success) {
|
|
136
|
+
log.error("❌ config/features validation failed:");
|
|
137
|
+
for (const issue of result.error.issues) {
|
|
138
|
+
log.error(` ${issue.path.join(".")}: ${issue.message}`);
|
|
139
|
+
}
|
|
140
|
+
return null;
|
|
141
|
+
}
|
|
142
|
+
return result.data.features;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
// Priority 2: registry (preset-provided features)
|
|
146
|
+
const registryFeatures = options?.registry?.getFeatureDefs();
|
|
147
|
+
if (registryFeatures && registryFeatures.features.length > 0) {
|
|
148
|
+
return registryFeatures.features;
|
|
132
149
|
}
|
|
133
|
-
return
|
|
150
|
+
return null;
|
|
134
151
|
}
|
|
135
152
|
/**
|
|
136
153
|
* Run the coverage audit and produce a structured report.
|
|
137
154
|
*/
|
|
138
|
-
export function runCoverageAudit(rootDir) {
|
|
139
|
-
const features = loadFeatureRegistry(rootDir
|
|
155
|
+
export function runCoverageAudit(rootDir, options) {
|
|
156
|
+
const features = loadFeatureRegistry(rootDir, {
|
|
157
|
+
registry: options?.registry,
|
|
158
|
+
});
|
|
140
159
|
if (!features)
|
|
141
160
|
return null;
|
|
142
161
|
const taskCounts = countTasksByArea(rootDir);
|
|
@@ -5,11 +5,21 @@
|
|
|
5
5
|
* files can be deleted while tests and other modules retain access
|
|
6
6
|
* to these shared definitions.
|
|
7
7
|
*/
|
|
8
|
-
import type {
|
|
9
|
-
/**
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
8
|
+
import type { DebugOptions, FilterOptions, StepResult } from "../_vendor/ailf-core/index.d.ts";
|
|
9
|
+
/**
|
|
10
|
+
* Get the Promptfoo config file path for a given mode.
|
|
11
|
+
*
|
|
12
|
+
* Literacy variants use legacy naming for backward compatibility.
|
|
13
|
+
* All other modes use the pattern: `promptfooconfig.<mode>.yaml`
|
|
14
|
+
*/
|
|
15
|
+
export declare function configFileForMode(mode: string): string;
|
|
16
|
+
/**
|
|
17
|
+
* Get the results file path for a given mode.
|
|
18
|
+
*
|
|
19
|
+
* Literacy variants use legacy naming for backward compatibility.
|
|
20
|
+
* All other modes use: `results/latest/eval-results-<mode>.json`
|
|
21
|
+
*/
|
|
22
|
+
export declare function resultsFileForMode(mode: string): string;
|
|
13
23
|
/** Extended step result that carries cache metadata for downstream steps */
|
|
14
24
|
export interface EvalStepResult {
|
|
15
25
|
/** The computed eval fingerprint (for publishing in provenance) */
|
|
@@ -25,7 +35,7 @@ export interface RemoteCacheOptions {
|
|
|
25
35
|
debug?: boolean;
|
|
26
36
|
/** Filter options used for fingerprint computation */
|
|
27
37
|
filter?: FilterOptions;
|
|
28
|
-
/** Grader model identifier from models
|
|
38
|
+
/** Grader model identifier from models config */
|
|
29
39
|
graderModel: string;
|
|
30
40
|
/** Disable remote cache lookup (--no-remote-cache) */
|
|
31
41
|
noRemoteCache?: boolean;
|
|
@@ -6,18 +6,39 @@
|
|
|
6
6
|
* to these shared definitions.
|
|
7
7
|
*/
|
|
8
8
|
import { existsSync, readFileSync } from "fs";
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
// ---------------------------------------------------------------------------
|
|
10
|
+
// Config / results file naming — unified across all modes
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
/** Literacy variant names that map to legacy config file naming */
|
|
13
|
+
const LITERACY_CONFIG_FILES = {
|
|
11
14
|
agentic: "promptfooconfig.agentic.yaml",
|
|
12
15
|
baseline: "promptfooconfig.yaml",
|
|
13
16
|
observed: "promptfooconfig.observed.yaml",
|
|
14
17
|
};
|
|
15
|
-
/**
|
|
16
|
-
|
|
18
|
+
/** Literacy variant names that map to legacy results file naming */
|
|
19
|
+
const LITERACY_RESULTS_FILES = {
|
|
17
20
|
agentic: "results/latest/eval-results-agentic.json",
|
|
18
21
|
baseline: "results/latest/eval-results.json",
|
|
19
22
|
observed: "results/latest/eval-results-observed.json",
|
|
20
23
|
};
|
|
24
|
+
/**
|
|
25
|
+
* Get the Promptfoo config file path for a given mode.
|
|
26
|
+
*
|
|
27
|
+
* Literacy variants use legacy naming for backward compatibility.
|
|
28
|
+
* All other modes use the pattern: `promptfooconfig.<mode>.yaml`
|
|
29
|
+
*/
|
|
30
|
+
export function configFileForMode(mode) {
|
|
31
|
+
return LITERACY_CONFIG_FILES[mode] ?? `promptfooconfig.${mode}.yaml`;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Get the results file path for a given mode.
|
|
35
|
+
*
|
|
36
|
+
* Literacy variants use legacy naming for backward compatibility.
|
|
37
|
+
* All other modes use: `results/latest/eval-results-<mode>.json`
|
|
38
|
+
*/
|
|
39
|
+
export function resultsFileForMode(mode) {
|
|
40
|
+
return (LITERACY_RESULTS_FILES[mode] ?? `results/latest/eval-results-${mode}.json`);
|
|
41
|
+
}
|
|
21
42
|
/**
|
|
22
43
|
* Build promptfoo filter flags from debug options.
|
|
23
44
|
*/
|
|
@@ -43,9 +43,9 @@ export interface FingerprintInput {
|
|
|
43
43
|
* This is similar to `getStepInputPaths()` in `cache.ts` but is more
|
|
44
44
|
* comprehensive and explicitly designed for cross-environment cache keys:
|
|
45
45
|
*
|
|
46
|
-
* - Includes `config/prompts
|
|
46
|
+
* - Includes `config/prompts` and `config/rubrics` directly
|
|
47
47
|
* (the local cache only includes them indirectly via generated configs)
|
|
48
|
-
* - Includes `config/models
|
|
48
|
+
* - Includes `config/models` (model configuration)
|
|
49
49
|
* - Includes task definitions and reference solutions
|
|
50
50
|
* - Includes the actual documentation content (contexts/canonical/*.md)
|
|
51
51
|
* - Respects filter flags to only include relevant files
|
|
@@ -43,9 +43,9 @@ const FINGERPRINT_VERSION = "eval-fingerprint-v1";
|
|
|
43
43
|
* This is similar to `getStepInputPaths()` in `cache.ts` but is more
|
|
44
44
|
* comprehensive and explicitly designed for cross-environment cache keys:
|
|
45
45
|
*
|
|
46
|
-
* - Includes `config/prompts
|
|
46
|
+
* - Includes `config/prompts` and `config/rubrics` directly
|
|
47
47
|
* (the local cache only includes them indirectly via generated configs)
|
|
48
|
-
* - Includes `config/models
|
|
48
|
+
* - Includes `config/models` (model configuration)
|
|
49
49
|
* - Includes task definitions and reference solutions
|
|
50
50
|
* - Includes the actual documentation content (contexts/canonical/*.md)
|
|
51
51
|
* - Respects filter flags to only include relevant files
|
|
@@ -56,11 +56,10 @@ export function collectFingerprintInputPaths(rootDir, filter) {
|
|
|
56
56
|
// -----------------------------------------------------------------------
|
|
57
57
|
// Config files — always included
|
|
58
58
|
// -----------------------------------------------------------------------
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
];
|
|
59
|
+
// Check all supported extensions in priority order
|
|
60
|
+
const configNames = ["models", "prompts", "rubrics"];
|
|
61
|
+
const configExts = [".ts", ".js", ".yaml", ".yml", ".json"];
|
|
62
|
+
const configFiles = configNames.flatMap((name) => configExts.map((ext) => `config/${name}${ext}`));
|
|
64
63
|
for (const f of configFiles) {
|
|
65
64
|
const p = r(f);
|
|
66
65
|
if (existsSync(p))
|
|
@@ -72,12 +71,12 @@ export function collectFingerprintInputPaths(rootDir, filter) {
|
|
|
72
71
|
const tasksDir = r("tasks");
|
|
73
72
|
if (existsSync(tasksDir)) {
|
|
74
73
|
const taskFiles = readdirSync(tasksDir)
|
|
75
|
-
.filter((f) =>
|
|
74
|
+
.filter((f) => /\.(yaml|yml|task\.ts|task\.js)$/.test(f))
|
|
76
75
|
.filter((f) => !f.startsWith(".")); // exclude .expanded.yaml
|
|
77
76
|
for (const f of taskFiles) {
|
|
78
77
|
// If area filter is set, only include matching task files
|
|
79
78
|
if (filter?.areas && filter.areas.length > 0) {
|
|
80
|
-
const stem = f.replace(/\.
|
|
79
|
+
const stem = f.replace(/\.(yaml|yml|task\.ts|task\.js)$/, "");
|
|
81
80
|
if (!filter.areas.includes(stem))
|
|
82
81
|
continue;
|
|
83
82
|
}
|
|
@@ -1,6 +1,15 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* pipeline/expand-tasks.ts
|
|
3
3
|
*
|
|
4
|
+
* @deprecated This is part of the LEGACY compilation path. New code should
|
|
5
|
+
* use the literacy handler in the compiler pipeline instead:
|
|
6
|
+
*
|
|
7
|
+
* import { compileLiteracyTask } from "./compiler/mode-handlers/literacy-handler.js"
|
|
8
|
+
*
|
|
9
|
+
* @see packages/eval/src/pipeline/compiler/mode-handlers/literacy-handler.ts
|
|
10
|
+
*
|
|
11
|
+
* ---
|
|
12
|
+
*
|
|
4
13
|
* Reads task YAML files in the single-definition format and expands each
|
|
5
14
|
* task into gold + baseline Promptfoo test entries. This eliminates the
|
|
6
15
|
* manual duplication where every task had to be written twice.
|
|
@@ -34,15 +43,15 @@
|
|
|
34
43
|
* value: ["client.fetch", "createClient"]
|
|
35
44
|
* baseline:
|
|
36
45
|
* enabled: true
|
|
37
|
-
* rubric:
|
|
46
|
+
* rubric: full
|
|
38
47
|
*
|
|
39
48
|
* Expands to:
|
|
40
49
|
* 1. Gold entry — uses vars.docs as-is, resolves templates, appends doc-coverage
|
|
41
|
-
* 2. Baseline entry — sets docs: "",
|
|
50
|
+
* 2. Baseline entry — sets docs: "", uses full rubric (same assertions as gold)
|
|
42
51
|
*/
|
|
43
|
-
import type {
|
|
44
|
-
import type { Logger } from "../_vendor/ailf-core/index.d.ts";
|
|
52
|
+
import type { LiteracyTaskDefinition, Logger } from "../_vendor/ailf-core/index.d.ts";
|
|
45
53
|
import { type RubricConfig } from "./schemas.js";
|
|
54
|
+
import { type LiteracyEvalSubMode } from "./normalize-mode.js";
|
|
46
55
|
import type { FilterOptions } from "./types.js";
|
|
47
56
|
/** Any assertion entry (templated or value-based). */
|
|
48
57
|
export type AssertEntry = TemplatedAssert | ValueAssert;
|
|
@@ -72,14 +81,14 @@ export interface LegacyTaskEntry {
|
|
|
72
81
|
}
|
|
73
82
|
/** A single task definition in the new format (input). */
|
|
74
83
|
export interface SingleTaskDefinition {
|
|
75
|
-
/** Grading assertions (applied to gold
|
|
84
|
+
/** Grading assertions (applied to both gold and baseline by default). */
|
|
76
85
|
assert: AssertEntry[];
|
|
77
86
|
/** Baseline generation options. */
|
|
78
87
|
baseline?: {
|
|
79
88
|
/** Whether to generate a baseline variant. Default: true. */
|
|
80
89
|
enabled?: boolean;
|
|
81
90
|
/** Rubric mode: 'full' copies all asserts, 'abbreviated' generates a
|
|
82
|
-
* summary rubric, 'none' omits rubric asserts. Default: '
|
|
91
|
+
* summary rubric, 'none' omits rubric asserts. Default: 'full'. */
|
|
83
92
|
rubric?: "abbreviated" | "full" | "none";
|
|
84
93
|
};
|
|
85
94
|
/** Human-readable description of what this task tests. */
|
|
@@ -155,19 +164,19 @@ export declare function clearRubricCache(): void;
|
|
|
155
164
|
* a single prompt that doesn't use `{{docs}}`; baseline entries would be
|
|
156
165
|
* pure waste — identical prompts, wasted API calls).
|
|
157
166
|
*/
|
|
158
|
-
export declare function expandTask(task: SingleTaskDefinition, rubricConfig: RubricConfig, mode?:
|
|
167
|
+
export declare function expandTask(task: SingleTaskDefinition, rubricConfig: RubricConfig, mode?: LiteracyEvalSubMode): ExpandedTestEntry[];
|
|
159
168
|
/**
|
|
160
|
-
* Expand an array of
|
|
161
|
-
* Promptfoo-compatible test entries. This is the TaskSource-aware
|
|
162
|
-
* of loadAndExpandTasks() — it skips YAML file I/O and works
|
|
163
|
-
* the
|
|
169
|
+
* Expand an array of LiteracyTaskDefinition[] (from any TaskSource adapter)
|
|
170
|
+
* into Promptfoo-compatible test entries. This is the TaskSource-aware
|
|
171
|
+
* counterpart of loadAndExpandTasks() — it skips YAML file I/O and works
|
|
172
|
+
* directly with the domain type.
|
|
164
173
|
*
|
|
165
|
-
* @param tasks -
|
|
174
|
+
* @param tasks - Literacy task definitions from any TaskSource adapter
|
|
166
175
|
* @param rootDir - Eval package root (needed to load rubric templates)
|
|
167
176
|
* @param mode - Expansion mode: 'baseline' (gold + baseline) or 'agentic' (gold only)
|
|
168
177
|
* @returns Expanded test entries and statistics
|
|
169
178
|
*/
|
|
170
|
-
export declare function expandTaskDefinitions(tasks:
|
|
179
|
+
export declare function expandTaskDefinitions(tasks: LiteracyTaskDefinition[], rootDir: string, mode?: LiteracyEvalSubMode): {
|
|
171
180
|
entries: ExpandedTestEntry[];
|
|
172
181
|
stats: {
|
|
173
182
|
totalTasks: number;
|
|
@@ -199,7 +208,7 @@ export declare function isTemplatedAssert(entry: AssertEntry): entry is Template
|
|
|
199
208
|
*
|
|
200
209
|
* Returns the expanded entries grouped by source file.
|
|
201
210
|
*/
|
|
202
|
-
export declare function loadAndExpandTasks(rootDir: string, filter?: FilterOptions, mode?:
|
|
211
|
+
export declare function loadAndExpandTasks(rootDir: string, filter?: FilterOptions, mode?: LiteracyEvalSubMode, logger?: Logger): {
|
|
203
212
|
/** All expanded test entries, in order. */
|
|
204
213
|
entries: ExpandedTestEntry[];
|
|
205
214
|
/** Statistics about what was processed. */
|
|
@@ -1,6 +1,15 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* pipeline/expand-tasks.ts
|
|
3
3
|
*
|
|
4
|
+
* @deprecated This is part of the LEGACY compilation path. New code should
|
|
5
|
+
* use the literacy handler in the compiler pipeline instead:
|
|
6
|
+
*
|
|
7
|
+
* import { compileLiteracyTask } from "./compiler/mode-handlers/literacy-handler.js"
|
|
8
|
+
*
|
|
9
|
+
* @see packages/eval/src/pipeline/compiler/mode-handlers/literacy-handler.ts
|
|
10
|
+
*
|
|
11
|
+
* ---
|
|
12
|
+
*
|
|
4
13
|
* Reads task YAML files in the single-definition format and expands each
|
|
5
14
|
* task into gold + baseline Promptfoo test entries. This eliminates the
|
|
6
15
|
* manual duplication where every task had to be written twice.
|
|
@@ -34,17 +43,19 @@
|
|
|
34
43
|
* value: ["client.fetch", "createClient"]
|
|
35
44
|
* baseline:
|
|
36
45
|
* enabled: true
|
|
37
|
-
* rubric:
|
|
46
|
+
* rubric: full
|
|
38
47
|
*
|
|
39
48
|
* Expands to:
|
|
40
49
|
* 1. Gold entry — uses vars.docs as-is, resolves templates, appends doc-coverage
|
|
41
|
-
* 2. Baseline entry — sets docs: "",
|
|
50
|
+
* 2. Baseline entry — sets docs: "", uses full rubric (same assertions as gold)
|
|
42
51
|
*/
|
|
43
52
|
import { existsSync, readFileSync, readdirSync } from "fs";
|
|
44
53
|
import { resolve } from "path";
|
|
45
54
|
import { load } from "js-yaml";
|
|
46
55
|
import { ConsoleLogger } from "../adapters/loggers/index.js";
|
|
56
|
+
import { loadConfigFile } from "./compiler/config-loader.js";
|
|
47
57
|
import { RubricConfigSchema } from "./schemas.js";
|
|
58
|
+
import { LiteracyVariant } from "./normalize-mode.js";
|
|
48
59
|
// ---------------------------------------------------------------------------
|
|
49
60
|
// Rubric template loading and assembly
|
|
50
61
|
// ---------------------------------------------------------------------------
|
|
@@ -142,7 +153,7 @@ export function clearRubricCache() {
|
|
|
142
153
|
* a single prompt that doesn't use `{{docs}}`; baseline entries would be
|
|
143
154
|
* pure waste — identical prompts, wasted API calls).
|
|
144
155
|
*/
|
|
145
|
-
export function expandTask(task, rubricConfig, mode =
|
|
156
|
+
export function expandTask(task, rubricConfig, mode = LiteracyVariant.STANDARD) {
|
|
146
157
|
const entries = [];
|
|
147
158
|
// Resolve all templated assertions
|
|
148
159
|
const resolvedAsserts = task.assert.map((a) => resolveAssert(a, rubricConfig));
|
|
@@ -168,20 +179,20 @@ export function expandTask(task, rubricConfig, mode = "baseline") {
|
|
|
168
179
|
entries.push({
|
|
169
180
|
assert: [...resolvedAsserts],
|
|
170
181
|
description: `${task.description} (gold)`,
|
|
171
|
-
...(mode ===
|
|
182
|
+
...(mode === LiteracyVariant.STANDARD ? { prompts: ["with-docs"] } : {}),
|
|
172
183
|
vars: { ...task.vars, __featureArea: task.featureArea ?? "" },
|
|
173
184
|
});
|
|
174
185
|
// Baseline entry — floor measurement (no docs, parametric knowledge only).
|
|
175
186
|
// Skipped entirely in agentic mode: the agentic prompt doesn't reference
|
|
176
187
|
// {{docs}}, so gold and baseline would produce identical prompts — pure
|
|
177
188
|
// waste of API calls and cost.
|
|
178
|
-
if (mode ===
|
|
189
|
+
if (mode === LiteracyVariant.AGENTIC) {
|
|
179
190
|
return entries;
|
|
180
191
|
}
|
|
181
192
|
// Restricted to the 'without-docs' prompt. Unless explicitly disabled.
|
|
182
193
|
const baselineEnabled = task.baseline?.enabled !== false;
|
|
183
194
|
if (baselineEnabled) {
|
|
184
|
-
const rubricMode = task.baseline?.rubric ?? "
|
|
195
|
+
const rubricMode = task.baseline?.rubric ?? "full";
|
|
185
196
|
const baselineAsserts = buildBaselineAsserts(resolvedAsserts, rubricMode);
|
|
186
197
|
entries.push({
|
|
187
198
|
description: `${task.description} (baseline)`,
|
|
@@ -197,8 +208,8 @@ export function expandTask(task, rubricConfig, mode = "baseline") {
|
|
|
197
208
|
return entries;
|
|
198
209
|
}
|
|
199
210
|
/**
|
|
200
|
-
* Convert a
|
|
201
|
-
*
|
|
211
|
+
* Convert a LiteracyTaskDefinition to the local SingleTaskDefinition
|
|
212
|
+
* format used by expandTask().
|
|
202
213
|
*
|
|
203
214
|
* When a task has no canonical docs, the `docs` var is set to empty string
|
|
204
215
|
* instead of a file path. This prevents Promptfoo from trying to read a
|
|
@@ -206,38 +217,38 @@ export function expandTask(task, rubricConfig, mode = "baseline") {
|
|
|
206
217
|
* without canonical docs). The gold entry still runs — it just tests model
|
|
207
218
|
* knowledge alone, same as the baseline variant.
|
|
208
219
|
*/
|
|
209
|
-
function
|
|
210
|
-
const hasDocs = task.
|
|
220
|
+
function literacyTaskToSingle(task) {
|
|
221
|
+
const hasDocs = (task.context?.docs?.length ?? 0) > 0;
|
|
211
222
|
return {
|
|
212
|
-
assert: task.assertions.map((a) => ({ ...a })),
|
|
223
|
+
assert: (task.assertions ?? []).map((a) => ({ ...a })),
|
|
213
224
|
baseline: task.baseline,
|
|
214
|
-
description: task.
|
|
225
|
+
description: task.title,
|
|
215
226
|
doc_coverage: task.docCoverage,
|
|
216
|
-
featureArea: task.
|
|
227
|
+
featureArea: task.area ?? "",
|
|
217
228
|
id: task.id,
|
|
218
229
|
vars: {
|
|
219
230
|
docs: hasDocs ? `file://contexts/canonical/${task.id}.md` : "",
|
|
220
|
-
task: task.
|
|
221
|
-
...task.
|
|
231
|
+
task: task.prompt?.text ?? "",
|
|
232
|
+
...task.prompt?.vars,
|
|
222
233
|
},
|
|
223
234
|
};
|
|
224
235
|
}
|
|
225
236
|
/**
|
|
226
|
-
* Expand an array of
|
|
227
|
-
* Promptfoo-compatible test entries. This is the TaskSource-aware
|
|
228
|
-
* of loadAndExpandTasks() — it skips YAML file I/O and works
|
|
229
|
-
* the
|
|
237
|
+
* Expand an array of LiteracyTaskDefinition[] (from any TaskSource adapter)
|
|
238
|
+
* into Promptfoo-compatible test entries. This is the TaskSource-aware
|
|
239
|
+
* counterpart of loadAndExpandTasks() — it skips YAML file I/O and works
|
|
240
|
+
* directly with the domain type.
|
|
230
241
|
*
|
|
231
|
-
* @param tasks -
|
|
242
|
+
* @param tasks - Literacy task definitions from any TaskSource adapter
|
|
232
243
|
* @param rootDir - Eval package root (needed to load rubric templates)
|
|
233
244
|
* @param mode - Expansion mode: 'baseline' (gold + baseline) or 'agentic' (gold only)
|
|
234
245
|
* @returns Expanded test entries and statistics
|
|
235
246
|
*/
|
|
236
|
-
export function expandTaskDefinitions(tasks, rootDir, mode =
|
|
247
|
+
export function expandTaskDefinitions(tasks, rootDir, mode = LiteracyVariant.STANDARD) {
|
|
237
248
|
const rubricConfig = loadRubricTemplates(rootDir);
|
|
238
249
|
const entries = [];
|
|
239
250
|
for (const task of tasks) {
|
|
240
|
-
const single =
|
|
251
|
+
const single = literacyTaskToSingle(task);
|
|
241
252
|
entries.push(...expandTask(single, rubricConfig, mode));
|
|
242
253
|
}
|
|
243
254
|
return {
|
|
@@ -314,7 +325,7 @@ export function isTemplatedAssert(entry) {
|
|
|
314
325
|
*
|
|
315
326
|
* Returns the expanded entries grouped by source file.
|
|
316
327
|
*/
|
|
317
|
-
export function loadAndExpandTasks(rootDir, filter, mode =
|
|
328
|
+
export function loadAndExpandTasks(rootDir, filter, mode = LiteracyVariant.STANDARD, logger) {
|
|
318
329
|
const log = logger ?? new ConsoleLogger();
|
|
319
330
|
const tasksDir = resolve(rootDir, "tasks");
|
|
320
331
|
if (!existsSync(tasksDir)) {
|
|
@@ -420,18 +431,13 @@ export function loadAndExpandTasks(rootDir, filter, mode = "baseline", logger) {
|
|
|
420
431
|
export function loadRubricTemplates(rootDir) {
|
|
421
432
|
if (cachedRubricConfig)
|
|
422
433
|
return cachedRubricConfig;
|
|
423
|
-
const
|
|
424
|
-
|
|
425
|
-
throw new Error(`config/rubrics.yaml not found at ${filePath}`);
|
|
426
|
-
}
|
|
427
|
-
const raw = readFileSync(filePath, "utf-8");
|
|
428
|
-
const parsed = load(raw);
|
|
429
|
-
const result = RubricConfigSchema.safeParse(parsed);
|
|
434
|
+
const { data } = loadConfigFile("rubrics", rootDir);
|
|
435
|
+
const result = RubricConfigSchema.safeParse(data);
|
|
430
436
|
if (!result.success) {
|
|
431
437
|
const messages = result.error.issues
|
|
432
438
|
.map((i) => ` [${i.path.join(".")}]: ${i.message}`)
|
|
433
439
|
.join("\n");
|
|
434
|
-
throw new Error(`Invalid config/rubrics
|
|
440
|
+
throw new Error(`Invalid config/rubrics:\n${messages}`);
|
|
435
441
|
}
|
|
436
442
|
cachedRubricConfig = result.data;
|
|
437
443
|
return result.data;
|
|
@@ -34,7 +34,7 @@ export declare function buildGapAnalysisReport(failureModeReport: FailureModeRep
|
|
|
34
34
|
*
|
|
35
35
|
* @param failureModeReport - Classified failure modes from Phase 3a
|
|
36
36
|
* @param scores - Per-area feature scores
|
|
37
|
-
* @param weights - Dimension weights (defaults to rubrics
|
|
37
|
+
* @param weights - Dimension weights (defaults to rubrics config weights)
|
|
38
38
|
* @returns Gap estimates sorted by priority (highest first)
|
|
39
39
|
*/
|
|
40
40
|
export declare function estimateImpact(failureModeReport: FailureModeReport, scores: FeatureScore[], weights?: Record<string, number>): GapEstimate[];
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
// ---------------------------------------------------------------------------
|
|
19
19
|
// Constants
|
|
20
20
|
// ---------------------------------------------------------------------------
|
|
21
|
-
/** Default dimension weights (must match rubrics
|
|
21
|
+
/** Default dimension weights (must match rubrics config) */
|
|
22
22
|
const DEFAULT_WEIGHTS = {
|
|
23
23
|
"code-correctness": 0.25,
|
|
24
24
|
"doc-coverage": 0.25,
|
|
@@ -71,7 +71,7 @@ export function buildGapAnalysisReport(failureModeReport, scores, weights) {
|
|
|
71
71
|
*
|
|
72
72
|
* @param failureModeReport - Classified failure modes from Phase 3a
|
|
73
73
|
* @param scores - Per-area feature scores
|
|
74
|
-
* @param weights - Dimension weights (defaults to rubrics
|
|
74
|
+
* @param weights - Dimension weights (defaults to rubrics config weights)
|
|
75
75
|
* @returns Gap estimates sorted by priority (highest first)
|
|
76
76
|
*/
|
|
77
77
|
export function estimateImpact(failureModeReport, scores, weights = DEFAULT_WEIGHTS) {
|
|
@@ -1,6 +1,21 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* pipeline/generate-configs.ts
|
|
3
3
|
*
|
|
4
|
+
* @deprecated This is the LEGACY compilation path. New code should use the
|
|
5
|
+
* config compiler pipeline instead:
|
|
6
|
+
*
|
|
7
|
+
* import { compileLiteracyTasks } from "./compiler/literacy-bridge.js"
|
|
8
|
+
* import { buildTaskGraph, compileToPromptfoo } from "./compiler/index.js"
|
|
9
|
+
*
|
|
10
|
+
* This file is retained behind the `--legacy-compiler` CLI flag as an
|
|
11
|
+
* emergency fallback during the migration period. It will be removed once
|
|
12
|
+
* the new compiler has been validated in production.
|
|
13
|
+
*
|
|
14
|
+
* @see packages/eval/src/pipeline/compiler/ — the new compiler pipeline
|
|
15
|
+
* @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
|
|
16
|
+
*
|
|
17
|
+
* ---
|
|
18
|
+
*
|
|
4
19
|
* Reads config/models.yaml (the central model registry) and generates all
|
|
5
20
|
* promptfoo config files with the correct provider entries.
|
|
6
21
|
*
|
|
@@ -18,7 +33,7 @@
|
|
|
18
33
|
* @see config/models.yaml — the central model registry
|
|
19
34
|
* @see docs/exec-plans/eliminate-lib-layer.md
|
|
20
35
|
*/
|
|
21
|
-
import { type
|
|
36
|
+
import { type LiteracyTaskDefinition, type Logger } from "../_vendor/ailf-core/index.d.ts";
|
|
22
37
|
import type { FilterOptions } from "./types.js";
|
|
23
38
|
import { type ResolvedSourceConfig } from "../sources.js";
|
|
24
39
|
export { extractModelName, extractProvider, mergeConfig, modelMatchesMode, } from "../_vendor/ailf-core/index.d.ts";
|
|
@@ -41,7 +56,10 @@ interface LoadedPrompts {
|
|
|
41
56
|
raw: string;
|
|
42
57
|
};
|
|
43
58
|
}
|
|
44
|
-
/**
|
|
59
|
+
/**
|
|
60
|
+
* Load prompt templates. Uses handler-owned literacy templates as defaults,
|
|
61
|
+
* with config/prompts.ts as an override layer for user customization.
|
|
62
|
+
*/
|
|
45
63
|
export declare function loadPrompts(rootDir: string): LoadedPrompts;
|
|
46
64
|
/** Options for the generateConfigs function. */
|
|
47
65
|
export interface GenerateConfigsOptions {
|
|
@@ -59,10 +77,10 @@ export interface GenerateConfigsOptions {
|
|
|
59
77
|
searchMode?: string;
|
|
60
78
|
/** Documentation source name (e.g., "branch", "local") */
|
|
61
79
|
source?: string;
|
|
62
|
-
/** Pre-loaded task definitions from a TaskSource adapter.
|
|
80
|
+
/** Pre-loaded literacy task definitions from a TaskSource adapter.
|
|
63
81
|
* When provided, expandTaskDefinitions() is used instead of
|
|
64
82
|
* loadAndExpandTasks() (which reads from tasks/*.yaml files). */
|
|
65
|
-
tasks?:
|
|
83
|
+
tasks?: LiteracyTaskDefinition[];
|
|
66
84
|
}
|
|
67
85
|
/**
|
|
68
86
|
* Generate Promptfoo config files from models.yaml + task definitions.
|