@sanity/ailf 0.5.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/features.ts +23 -0
- package/config/models.ts +83 -0
- package/config/prompts.ts +16 -0
- package/config/rubrics.ts +225 -0
- package/config/schedules.ts +47 -0
- package/config/sinks.ts +37 -0
- package/config/sources.ts +21 -0
- package/config/thresholds.ts +61 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +174 -0
- package/dist/_vendor/ailf-core/config-helpers.js +150 -0
- package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
- package/dist/_vendor/ailf-core/env-helper.js +45 -0
- package/dist/_vendor/ailf-core/index.d.ts +3 -0
- package/dist/_vendor/ailf-core/index.js +5 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +15 -2
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
- package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
- package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +7 -2
- package/dist/_vendor/ailf-core/schemas/eval-config.js +7 -2
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +8 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +6 -1
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -29
- package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -8
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
- package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
- package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/services/index.js +2 -1
- package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
- package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
- package/dist/_vendor/ailf-core/services/scoring.js +25 -15
- package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
- package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +319 -0
- package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +45 -81
- package/dist/_vendor/ailf-core/types/index.js +8 -1
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +202 -0
- package/dist/_vendor/ailf-core/types/plugin-registry.js +132 -0
- package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
- package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
- package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
- package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
- package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
- package/dist/_vendor/ailf-core/types/trace.js +18 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
- package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
- package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
- package/dist/_vendor/ailf-shared/index.d.ts +0 -1
- package/dist/_vendor/ailf-shared/index.js +0 -1
- package/dist/adapters/api-client/build-request.js +14 -13
- package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
- package/dist/adapters/config-sources/file-config-adapter.js +38 -12
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +1 -0
- package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
- package/dist/adapters/config-sources/ts-config-loader.js +133 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
- package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +22 -23
- package/dist/adapters/task-sources/index.d.ts +1 -0
- package/dist/adapters/task-sources/index.js +1 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +4 -4
- package/dist/adapters/task-sources/repo-task-source.js +69 -16
- package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
- package/dist/adapters/task-sources/task-file-loader.js +83 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
- package/dist/adapters/task-sources/yaml-task-source.js +19 -16
- package/dist/cli.js +0 -2
- package/dist/commands/baseline.js +4 -1
- package/dist/commands/calculate-scores.js +1 -1
- package/dist/commands/coverage-audit.js +7 -1
- package/dist/commands/explain-handler.js +25 -23
- package/dist/commands/fetch-docs.js +3 -2
- package/dist/commands/generate-configs.js +1 -1
- package/dist/commands/interactive.js +11 -7
- package/dist/commands/pipeline-action.d.ts +2 -0
- package/dist/commands/pipeline-action.js +16 -6
- package/dist/commands/pipeline.d.ts +1 -0
- package/dist/commands/pipeline.js +4 -2
- package/dist/commands/pr-comment.js +1 -1
- package/dist/commands/publish.js +2 -2
- package/dist/commands/readiness-report.js +13 -6
- package/dist/composition-root.d.ts +1 -1
- package/dist/composition-root.js +67 -4
- package/dist/orchestration/build-app-context.js +1 -0
- package/dist/orchestration/build-step-sequence.js +24 -6
- package/dist/orchestration/steps/calculate-scores-step.js +24 -11
- package/dist/orchestration/steps/fetch-docs-step.js +6 -4
- package/dist/orchestration/steps/gap-analysis-step.js +8 -7
- package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
- package/dist/orchestration/steps/generate-configs-step.js +245 -51
- package/dist/orchestration/steps/grader-consistency-step.js +7 -4
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/readiness-step.js +5 -6
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
- package/dist/orchestration/steps/run-eval-step.js +8 -7
- package/dist/pipeline/cache.d.ts +1 -1
- package/dist/pipeline/cache.js +36 -8
- package/dist/pipeline/calculate-scores.d.ts +2 -4
- package/dist/pipeline/calculate-scores.js +43 -113
- package/dist/pipeline/checks.js +2 -2
- package/dist/pipeline/compare.js +8 -8
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +355 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +471 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
- package/dist/pipeline/compiler/assertion-mapper.js +175 -0
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
- package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
- package/dist/pipeline/compiler/config-loader.d.ts +56 -0
- package/dist/pipeline/compiler/config-loader.js +111 -0
- package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
- package/dist/pipeline/compiler/fixture-resolver.js +113 -0
- package/dist/pipeline/compiler/hash.d.ts +11 -0
- package/dist/pipeline/compiler/hash.js +18 -0
- package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
- package/dist/pipeline/compiler/ignore-fields.js +113 -0
- package/dist/pipeline/compiler/index.d.ts +29 -0
- package/dist/pipeline/compiler/index.js +45 -0
- package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
- package/dist/pipeline/compiler/literacy-bridge.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.js +21 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +277 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +67 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +309 -0
- package/dist/pipeline/compiler/presets/index.d.ts +9 -0
- package/dist/pipeline/compiler/presets/index.js +8 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +45 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.js +354 -0
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
- package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
- package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
- package/dist/pipeline/compiler/provider-assembler.js +137 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
- package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
- package/dist/pipeline/compiler/sandbox/index.js +11 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
- package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
- package/dist/pipeline/compiler/scoring-bridge.js +114 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
- package/dist/pipeline/compiler/task-graph-builder.js +291 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
- package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
- package/dist/pipeline/compiler/telemetry/index.js +19 -0
- package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
- package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
- package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
- package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
- package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
- package/dist/pipeline/compiler/variable-resolver.js +115 -0
- package/dist/pipeline/coverage-audit.d.ts +15 -5
- package/dist/pipeline/coverage-audit.js +41 -22
- package/dist/pipeline/eval-constants.d.ts +16 -6
- package/dist/pipeline/eval-constants.js +25 -4
- package/dist/pipeline/eval-fingerprint.d.ts +2 -2
- package/dist/pipeline/eval-fingerprint.js +8 -9
- package/dist/pipeline/expand-tasks.d.ts +19 -10
- package/dist/pipeline/expand-tasks.js +34 -28
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +2 -2
- package/dist/pipeline/generate-configs.d.ts +22 -4
- package/dist/pipeline/generate-configs.js +53 -24
- package/dist/pipeline/grader-api.d.ts +3 -3
- package/dist/pipeline/grader-api.js +5 -12
- package/dist/pipeline/grader-compare-runner.js +20 -27
- package/dist/pipeline/grader-comparison.d.ts +4 -8
- package/dist/pipeline/grader-comparison.js +11 -17
- package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
- package/dist/pipeline/grader-consistency-runner.js +16 -20
- package/dist/pipeline/grader-consistency.d.ts +6 -10
- package/dist/pipeline/grader-consistency.js +13 -32
- package/dist/pipeline/grader-sensitivity-runner.js +7 -5
- package/dist/pipeline/grader-sensitivity.d.ts +2 -6
- package/dist/pipeline/grader-sensitivity.js +10 -10
- package/dist/pipeline/grader-validate-runner.js +7 -5
- package/dist/pipeline/grader-validation.d.ts +2 -6
- package/dist/pipeline/grader-validation.js +14 -22
- package/dist/pipeline/map-request-to-config.js +6 -1
- package/dist/pipeline/mirror-repo-tasks.d.ts +6 -6
- package/dist/pipeline/mirror-repo-tasks.js +16 -15
- package/dist/pipeline/normalize-mode.d.ts +49 -0
- package/dist/pipeline/normalize-mode.js +64 -0
- package/dist/pipeline/plan.d.ts +5 -2
- package/dist/pipeline/plan.js +134 -78
- package/dist/pipeline/pr-comment.js +2 -0
- package/dist/pipeline/profile-resolution.d.ts +22 -14
- package/dist/pipeline/profile-resolution.js +41 -19
- package/dist/pipeline/provenance.d.ts +2 -2
- package/dist/pipeline/provenance.js +12 -17
- package/dist/pipeline/release-report.js +4 -4
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/rubric-loader.d.ts +20 -0
- package/dist/pipeline/rubric-loader.js +37 -0
- package/dist/pipeline/validate.d.ts +4 -4
- package/dist/pipeline/validate.js +64 -53
- package/dist/schedules/loader.js +18 -8
- package/dist/scripts/migrate-task-mode.d.ts +24 -0
- package/dist/scripts/migrate-task-mode.js +85 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +15 -15
- package/dist/sinks/loader.js +5 -7
- package/dist/sources.d.ts +7 -7
- package/dist/sources.js +22 -24
- package/dist/webhook/dispatch.js +2 -1
- package/package.json +6 -3
- package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
- package/tasks/literacy/frameworks.task.ts +128 -0
- package/tasks/literacy/functions.task.ts +69 -0
- package/tasks/literacy/groq.task.ts +258 -0
- package/tasks/literacy/nextjs-live.task.ts +75 -0
- package/tasks/literacy/studio-setup.task.ts +131 -0
- package/tasks/literacy/visual-editing.task.ts +146 -0
- package/config/features.yaml +0 -116
- package/config/models.yaml +0 -116
- package/config/prompts.yaml +0 -75
- package/config/rubrics.yaml +0 -81
- package/config/schedules.yaml +0 -43
- package/config/sinks.yaml +0 -54
- package/config/sources.yaml +0 -51
- package/config/thresholds.yaml +0 -49
- package/dist/agent-observer/test-imports.d.ts +0 -7
- package/dist/agent-observer/test-imports.js +0 -185
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fixture resolver — resolves fixture references into content for compilation.
|
|
3
|
+
*
|
|
4
|
+
* Handles document fixtures (fetched from Sanity or local files),
|
|
5
|
+
* file fixtures (read from disk), and inline fixtures (embedded in
|
|
6
|
+
* task definitions). Resolved content is injected into the TaskNode's
|
|
7
|
+
* VariableEnvelope for the compiler to use.
|
|
8
|
+
*
|
|
9
|
+
* Currently supports the existing fixture patterns:
|
|
10
|
+
* - `file://contexts/canonical/<id>.md` → read from local fs
|
|
11
|
+
* - Inline `vars.docs` strings → used as-is
|
|
12
|
+
* - Canonical doc references → resolved by DocFetcher port
|
|
13
|
+
*
|
|
14
|
+
* Future phases will add URI scheme resolution (repo://, sanity://, etc.)
|
|
15
|
+
* as described in the fixtures-artifacts design doc.
|
|
16
|
+
*
|
|
17
|
+
* @see docs/design-docs/architecture-overhaul/fixtures-artifacts.md
|
|
18
|
+
* @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
|
|
19
|
+
*/
|
|
20
|
+
import { existsSync, readFileSync } from "fs";
|
|
21
|
+
import { resolve } from "path";
|
|
22
|
+
import { simpleHash } from "./hash.js";
|
|
23
|
+
/**
|
|
24
|
+
* Resolve fixtures for a task definition.
|
|
25
|
+
*
|
|
26
|
+
* Reads `vars.docs` and canonical doc references, resolves them to
|
|
27
|
+
* content, and updates the variable envelope.
|
|
28
|
+
*/
|
|
29
|
+
export function resolveTaskFixtures(task, currentVars, options) {
|
|
30
|
+
const fixtures = new Map();
|
|
31
|
+
const warnings = [];
|
|
32
|
+
const updatedValues = { ...currentVars.values };
|
|
33
|
+
const updatedProvenance = { ...currentVars.provenance };
|
|
34
|
+
// Resolve file:// references in vars
|
|
35
|
+
for (const [key, value] of Object.entries(updatedValues)) {
|
|
36
|
+
if (typeof value === "string" && value.startsWith("file://")) {
|
|
37
|
+
const resolved = resolveFileRef(value, options.rootDir);
|
|
38
|
+
if (resolved.ok) {
|
|
39
|
+
updatedValues[key] = resolved.content;
|
|
40
|
+
updatedProvenance[key] = {
|
|
41
|
+
hash: simpleHash(resolved.content),
|
|
42
|
+
resolvedAt: new Date().toISOString(),
|
|
43
|
+
source: { fixtureId: value, type: "fixture" },
|
|
44
|
+
};
|
|
45
|
+
fixtures.set(value, {
|
|
46
|
+
content: resolved.content,
|
|
47
|
+
contentHash: simpleHash(resolved.content),
|
|
48
|
+
id: value,
|
|
49
|
+
name: value.replace("file://", ""),
|
|
50
|
+
type: "fetched",
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
else {
|
|
54
|
+
warnings.push(resolved.error);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
// Resolve canonical doc context if any — extract docs from the mode-specific
|
|
59
|
+
// context field (context.docs exists on literacy, mcp-server, agent-harness,
|
|
60
|
+
// and knowledge-probe variants)
|
|
61
|
+
const contextDocs = "context" in task && task.context?.docs ? task.context.docs : [];
|
|
62
|
+
if (contextDocs.length > 0) {
|
|
63
|
+
const docFixtureId = `canonical-docs:${task.id}`;
|
|
64
|
+
// Canonical docs are resolved at runtime by the DocFetcher port.
|
|
65
|
+
// At compile time, we create a placeholder fixture that signals
|
|
66
|
+
// the compiler to inject doc context at eval time.
|
|
67
|
+
fixtures.set(docFixtureId, {
|
|
68
|
+
content: null, // Deferred — resolved at eval time
|
|
69
|
+
id: docFixtureId,
|
|
70
|
+
name: `Canonical docs for ${task.id}`,
|
|
71
|
+
type: "fetched",
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
return {
|
|
75
|
+
fixtures,
|
|
76
|
+
updatedVars: {
|
|
77
|
+
declarations: currentVars.declarations,
|
|
78
|
+
provenance: updatedProvenance,
|
|
79
|
+
values: updatedValues,
|
|
80
|
+
},
|
|
81
|
+
warnings,
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Resolve a `file://` reference to file content.
|
|
86
|
+
*/
|
|
87
|
+
function resolveFileRef(fileRef, rootDir) {
|
|
88
|
+
const relativePath = fileRef.replace("file://", "");
|
|
89
|
+
const absolutePath = resolve(rootDir, relativePath);
|
|
90
|
+
// Path containment: prevent file://../../etc/passwd from reading outside rootDir
|
|
91
|
+
const normalizedBase = resolve(rootDir) + "/";
|
|
92
|
+
if (!absolutePath.startsWith(normalizedBase) &&
|
|
93
|
+
absolutePath !== resolve(rootDir)) {
|
|
94
|
+
return {
|
|
95
|
+
ok: false,
|
|
96
|
+
error: `Path traversal detected: "${fileRef}" resolves outside rootDir "${rootDir}"`,
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
if (!existsSync(absolutePath)) {
|
|
100
|
+
return {
|
|
101
|
+
ok: false,
|
|
102
|
+
error: `Fixture file not found: ${absolutePath} (referenced as ${fileRef})`,
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
try {
|
|
106
|
+
const content = readFileSync(absolutePath, "utf-8");
|
|
107
|
+
return { ok: true, content };
|
|
108
|
+
}
|
|
109
|
+
catch (err) {
|
|
110
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
111
|
+
return { ok: false, error: `Failed to read fixture ${fileRef}: ${msg}` };
|
|
112
|
+
}
|
|
113
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FNV-1a 32-bit hash — fast, non-cryptographic content hash.
|
|
3
|
+
*
|
|
4
|
+
* Used for cache keys, content fingerprinting, and provenance tracking.
|
|
5
|
+
* Returns an 8-character zero-padded hex string.
|
|
6
|
+
*
|
|
7
|
+
* Shared across fixture-resolver, variable-resolver, and trace-store
|
|
8
|
+
* to ensure consistent hash behavior. Uses unsigned right shift (`>>> 0`)
|
|
9
|
+
* to keep the hash in unsigned 32-bit integer range.
|
|
10
|
+
*/
|
|
11
|
+
export declare function simpleHash(content: string): string;
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FNV-1a 32-bit hash — fast, non-cryptographic content hash.
|
|
3
|
+
*
|
|
4
|
+
* Used for cache keys, content fingerprinting, and provenance tracking.
|
|
5
|
+
* Returns an 8-character zero-padded hex string.
|
|
6
|
+
*
|
|
7
|
+
* Shared across fixture-resolver, variable-resolver, and trace-store
|
|
8
|
+
* to ensure consistent hash behavior. Uses unsigned right shift (`>>> 0`)
|
|
9
|
+
* to keep the hash in unsigned 32-bit integer range.
|
|
10
|
+
*/
|
|
11
|
+
export function simpleHash(content) {
|
|
12
|
+
let hash = 0x811c9dc5; // FNV-1a offset basis
|
|
13
|
+
for (let i = 0; i < content.length; i++) {
|
|
14
|
+
hash ^= content.charCodeAt(i);
|
|
15
|
+
hash = (hash * 0x01000193) >>> 0; // FNV prime, unsigned 32-bit
|
|
16
|
+
}
|
|
17
|
+
return hash.toString(16).padStart(8, "0");
|
|
18
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ignoreFields — strips non-deterministic fields before assertion comparison.
|
|
3
|
+
*
|
|
4
|
+
* For tool outputs that contain timestamps, UUIDs, temp paths, or other
|
|
5
|
+
* non-deterministic values, assertions need a way to exclude specific
|
|
6
|
+
* fields from comparison. This module provides field stripping using
|
|
7
|
+
* dot-notation paths.
|
|
8
|
+
*
|
|
9
|
+
* Usage in task definitions:
|
|
10
|
+
* ```typescript
|
|
11
|
+
* assertions: [
|
|
12
|
+
* {
|
|
13
|
+
* type: "tool-output-matches",
|
|
14
|
+
* value: { title: "Hello" },
|
|
15
|
+
* ignoreFields: ["metadata.createdAt", "result.id", "_rev"],
|
|
16
|
+
* },
|
|
17
|
+
* ]
|
|
18
|
+
* ```
|
|
19
|
+
*
|
|
20
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
21
|
+
*/
|
|
22
|
+
/**
|
|
23
|
+
* Strip specified fields from an object using dot-notation paths.
|
|
24
|
+
*
|
|
25
|
+
* @param obj - The object to strip fields from (not mutated)
|
|
26
|
+
* @param fields - Dot-notation field paths to remove
|
|
27
|
+
* @returns A new object with the specified fields removed
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
* ```typescript
|
|
31
|
+
* stripFields(
|
|
32
|
+
* { title: "Hello", metadata: { createdAt: "2024-01-01", author: "Alice" } },
|
|
33
|
+
* ["metadata.createdAt"]
|
|
34
|
+
* )
|
|
35
|
+
* // => { title: "Hello", metadata: { author: "Alice" } }
|
|
36
|
+
* ```
|
|
37
|
+
*/
|
|
38
|
+
export declare function stripFields(obj: unknown, fields: string[]): unknown;
|
|
39
|
+
/**
|
|
40
|
+
* Strip specified fields from both actual and expected values,
|
|
41
|
+
* then compare them.
|
|
42
|
+
*
|
|
43
|
+
* @returns true if the objects are equal after stripping
|
|
44
|
+
*/
|
|
45
|
+
export declare function compareWithIgnoredFields(actual: unknown, expected: unknown, ignoreFields: string[]): boolean;
|
|
46
|
+
/**
|
|
47
|
+
* Generate a Promptfoo-compatible JavaScript assertion that applies
|
|
48
|
+
* ignoreFields stripping before comparison.
|
|
49
|
+
*
|
|
50
|
+
* This wraps a comparison assertion with field stripping logic,
|
|
51
|
+
* producing a self-contained JS assertion string.
|
|
52
|
+
*/
|
|
53
|
+
export declare function buildIgnoreFieldsWrapper(comparisonCode: string, ignoreFields: string[]): string;
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ignoreFields — strips non-deterministic fields before assertion comparison.
|
|
3
|
+
*
|
|
4
|
+
* For tool outputs that contain timestamps, UUIDs, temp paths, or other
|
|
5
|
+
* non-deterministic values, assertions need a way to exclude specific
|
|
6
|
+
* fields from comparison. This module provides field stripping using
|
|
7
|
+
* dot-notation paths.
|
|
8
|
+
*
|
|
9
|
+
* Usage in task definitions:
|
|
10
|
+
* ```typescript
|
|
11
|
+
* assertions: [
|
|
12
|
+
* {
|
|
13
|
+
* type: "tool-output-matches",
|
|
14
|
+
* value: { title: "Hello" },
|
|
15
|
+
* ignoreFields: ["metadata.createdAt", "result.id", "_rev"],
|
|
16
|
+
* },
|
|
17
|
+
* ]
|
|
18
|
+
* ```
|
|
19
|
+
*
|
|
20
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
21
|
+
*/
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Public API
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
/**
|
|
26
|
+
* Strip specified fields from an object using dot-notation paths.
|
|
27
|
+
*
|
|
28
|
+
* @param obj - The object to strip fields from (not mutated)
|
|
29
|
+
* @param fields - Dot-notation field paths to remove
|
|
30
|
+
* @returns A new object with the specified fields removed
|
|
31
|
+
*
|
|
32
|
+
* @example
|
|
33
|
+
* ```typescript
|
|
34
|
+
* stripFields(
|
|
35
|
+
* { title: "Hello", metadata: { createdAt: "2024-01-01", author: "Alice" } },
|
|
36
|
+
* ["metadata.createdAt"]
|
|
37
|
+
* )
|
|
38
|
+
* // => { title: "Hello", metadata: { author: "Alice" } }
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
export function stripFields(obj, fields) {
|
|
42
|
+
if (!fields || fields.length === 0)
|
|
43
|
+
return obj;
|
|
44
|
+
if (obj === null || obj === undefined)
|
|
45
|
+
return obj;
|
|
46
|
+
if (typeof obj !== "object")
|
|
47
|
+
return obj;
|
|
48
|
+
// Deep clone to avoid mutation
|
|
49
|
+
const clone = JSON.parse(JSON.stringify(obj));
|
|
50
|
+
for (const field of fields) {
|
|
51
|
+
removeFieldByPath(clone, field.split("."));
|
|
52
|
+
}
|
|
53
|
+
return clone;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Strip specified fields from both actual and expected values,
|
|
57
|
+
* then compare them.
|
|
58
|
+
*
|
|
59
|
+
* @returns true if the objects are equal after stripping
|
|
60
|
+
*/
|
|
61
|
+
export function compareWithIgnoredFields(actual, expected, ignoreFields) {
|
|
62
|
+
const strippedActual = stripFields(actual, ignoreFields);
|
|
63
|
+
const strippedExpected = stripFields(expected, ignoreFields);
|
|
64
|
+
return JSON.stringify(strippedActual) === JSON.stringify(strippedExpected);
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Generate a Promptfoo-compatible JavaScript assertion that applies
|
|
68
|
+
* ignoreFields stripping before comparison.
|
|
69
|
+
*
|
|
70
|
+
* This wraps a comparison assertion with field stripping logic,
|
|
71
|
+
* producing a self-contained JS assertion string.
|
|
72
|
+
*/
|
|
73
|
+
export function buildIgnoreFieldsWrapper(comparisonCode, ignoreFields) {
|
|
74
|
+
if (ignoreFields.length === 0)
|
|
75
|
+
return comparisonCode;
|
|
76
|
+
const stripFn = `function stripFields(obj, fields) {\n` +
|
|
77
|
+
` if (!obj || typeof obj !== 'object') return obj;\n` +
|
|
78
|
+
` const clone = JSON.parse(JSON.stringify(obj));\n` +
|
|
79
|
+
` for (const field of fields) {\n` +
|
|
80
|
+
` const parts = field.split('.');\n` +
|
|
81
|
+
` let current = clone;\n` +
|
|
82
|
+
` for (let i = 0; i < parts.length - 1; i++) {\n` +
|
|
83
|
+
` if (!current || typeof current !== 'object') break;\n` +
|
|
84
|
+
` current = current[parts[i]];\n` +
|
|
85
|
+
` }\n` +
|
|
86
|
+
` if (current && typeof current === 'object') {\n` +
|
|
87
|
+
` delete current[parts[parts.length - 1]];\n` +
|
|
88
|
+
` }\n` +
|
|
89
|
+
` }\n` +
|
|
90
|
+
` return clone;\n` +
|
|
91
|
+
`}\n`;
|
|
92
|
+
return (`${stripFn}\n` +
|
|
93
|
+
`const __ignoreFields = ${JSON.stringify(ignoreFields)};\n` +
|
|
94
|
+
`${comparisonCode}`);
|
|
95
|
+
}
|
|
96
|
+
// ---------------------------------------------------------------------------
|
|
97
|
+
// Internal helpers
|
|
98
|
+
// ---------------------------------------------------------------------------
|
|
99
|
+
function removeFieldByPath(obj, path) {
|
|
100
|
+
if (path.length === 0)
|
|
101
|
+
return;
|
|
102
|
+
const [head, ...rest] = path;
|
|
103
|
+
if (rest.length === 0) {
|
|
104
|
+
// Base case: delete the field
|
|
105
|
+
delete obj[head];
|
|
106
|
+
return;
|
|
107
|
+
}
|
|
108
|
+
// Recursive case: traverse into nested object
|
|
109
|
+
const child = obj[head];
|
|
110
|
+
if (child !== null && child !== undefined && typeof child === "object") {
|
|
111
|
+
removeFieldByPath(child, rest);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Config compiler — the heart of the new architecture.
|
|
3
|
+
*
|
|
4
|
+
* Converts task definitions from any source into a TaskGraph IR,
|
|
5
|
+
* then compiles the graph into Promptfoo YAML configuration.
|
|
6
|
+
*
|
|
7
|
+
* This module coexists with the existing `generate-configs.ts` path.
|
|
8
|
+
* Phase 7 will migrate callers to use the compiler exclusively.
|
|
9
|
+
*
|
|
10
|
+
* @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
|
|
11
|
+
*/
|
|
12
|
+
export { buildTaskGraph, detectCycle, type TaskGraphBuildOptions, type TaskGraphBuildResult, } from "./task-graph-builder.js";
|
|
13
|
+
export { compileToPromptfoo, type CompilationResult, type CompiledPromptfooConfig, type PromptfooCompilerOptions, type PromptfooPrompt, type PromptfooProvider, type PromptfooTestCase, } from "./promptfoo-compiler.js";
|
|
14
|
+
export { isAssertionCompatibleWithMode, isValidAssertionType, mapAssertions, type AssertionMapperOptions, type PromptfooAssertion, } from "./assertion-mapper.js";
|
|
15
|
+
export { resolveTaskFixtures, type FixtureResolutionResult, type FixtureResolverOptions, } from "./fixture-resolver.js";
|
|
16
|
+
export { createEnvelope, resolveVariables, type VariableResolutionResult, type VariableResolverOptions, } from "./variable-resolver.js";
|
|
17
|
+
export { buildMCPAssertions, compileAgentHarnessTask, compileLiteracyTask, compileKnowledgeProbeTask, compileMCPTask, validateAgentHarnessTask, validateLiteracyTask, validateKnowledgeProbeTask, validateMCPTask, type AgentHarnessCompileOptions, type AgentHarnessCompileResult, type AgentHarnessValidationError, type LiteracyCompileOptions, type LiteracyCompileResult, type LiteracyValidationError, type KnowledgeProbeCompileOptions, type KnowledgeProbeCompileResult, type KnowledgeProbeMetadata, type KnowledgeProbeValidationError, type MCPAssertionContext, type MCPCompileOptions, type MCPCompileResult, type MCPValidationError, type PromptfooExtension, type SandboxConfigMeta, } from "./mode-handlers/index.js";
|
|
18
|
+
export { createSandboxStrategy, DockerSandboxStrategy, GitWorktreeSandboxStrategy, selectSandboxStrategy, TempDirSandboxStrategy, type SandboxArtifacts, type SandboxInfo, type SandboxProvisionOptions, type SandboxSelectionResult, type SandboxStrategy, type SandboxType, } from "./sandbox/index.js";
|
|
19
|
+
export { provisionFixtures, type FixtureRef, type FixtureTransform, type ProvisionedFixture, type ProvisioningOptions, type ProvisioningResult, } from "./sandbox/fixture-provisioner.js";
|
|
20
|
+
export { loadModelsAndProviders, type AssembledProviders, type LiteracyVariantProviders, type ModelsAndProviders, } from "./provider-assembler.js";
|
|
21
|
+
export { writeCompiledLiteracyConfigs, type WriteCompiledConfigOptions, } from "./compiler-to-yaml.js";
|
|
22
|
+
export { compileLiteracyTasks, compareCompilerOutputs, type ComparisonDiscrepancy, type ComparisonResult, type LegacyEntry, type LiteracyBridgeOptions, type LiteracyBridgeResult, } from "./literacy-bridge.js";
|
|
23
|
+
export { checkBudget, classifyToolCall, classifyToolCalls, collectTrace, computeCost, createRedactionConfig, DEFAULT_REDACTION_RULES, estimateRunCost, extractTraceSummary, LocalTraceStore, lookupPricing, mergeTraces, redactTrace, type ActualCost, type BudgetCheckResult, type BudgetConfig, type CostEstimate, type ModelPricing, type ProviderResponse, type RawToolCall, type RedactionConfig, type RedactionResult, type RedactionRule, type TraceCollectorOptions, type TraceStore, type TraceStoreResult, type TraceSummary, } from "./telemetry/index.js";
|
|
24
|
+
export { registerSanityLiteracyPreset, sanityLiteracyPreset, } from "./presets/index.js";
|
|
25
|
+
export { buildIgnoreFieldsWrapper, compareWithIgnoredFields, stripFields, } from "./ignore-fields.js";
|
|
26
|
+
export { simpleHash } from "./hash.js";
|
|
27
|
+
export { scoreTestGroup, type BridgedScoreResult } from "./scoring-bridge.js";
|
|
28
|
+
export { ConfigNotFoundError, loadConfigFile, tryLoadConfigFile, } from "./config-loader.js";
|
|
29
|
+
export type { ConfigLoadResult } from "./config-loader.js";
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Config compiler — the heart of the new architecture.
|
|
3
|
+
*
|
|
4
|
+
* Converts task definitions from any source into a TaskGraph IR,
|
|
5
|
+
* then compiles the graph into Promptfoo YAML configuration.
|
|
6
|
+
*
|
|
7
|
+
* This module coexists with the existing `generate-configs.ts` path.
|
|
8
|
+
* Phase 7 will migrate callers to use the compiler exclusively.
|
|
9
|
+
*
|
|
10
|
+
* @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
|
|
11
|
+
*/
|
|
12
|
+
// TaskGraph builder
|
|
13
|
+
export { buildTaskGraph, detectCycle, } from "./task-graph-builder.js";
|
|
14
|
+
// Promptfoo compiler
|
|
15
|
+
export { compileToPromptfoo, } from "./promptfoo-compiler.js";
|
|
16
|
+
// Assertion mapper
|
|
17
|
+
export { isAssertionCompatibleWithMode, isValidAssertionType, mapAssertions, } from "./assertion-mapper.js";
|
|
18
|
+
// Fixture resolver
|
|
19
|
+
export { resolveTaskFixtures, } from "./fixture-resolver.js";
|
|
20
|
+
// Variable resolver
|
|
21
|
+
export { createEnvelope, resolveVariables, } from "./variable-resolver.js";
|
|
22
|
+
// Mode handlers
|
|
23
|
+
export { buildMCPAssertions, compileAgentHarnessTask, compileLiteracyTask, compileKnowledgeProbeTask, compileMCPTask, validateAgentHarnessTask, validateLiteracyTask, validateKnowledgeProbeTask, validateMCPTask, } from "./mode-handlers/index.js";
|
|
24
|
+
// Sandbox infrastructure
|
|
25
|
+
export { createSandboxStrategy, DockerSandboxStrategy, GitWorktreeSandboxStrategy, selectSandboxStrategy, TempDirSandboxStrategy, } from "./sandbox/index.js";
|
|
26
|
+
// Fixture provisioning
|
|
27
|
+
export { provisionFixtures, } from "./sandbox/fixture-provisioner.js";
|
|
28
|
+
// Provider assembler — builds per-variant provider arrays from models config
|
|
29
|
+
export { loadModelsAndProviders, } from "./provider-assembler.js";
|
|
30
|
+
// Compiler-to-YAML — serializes compiled config to Promptfoo YAML files
|
|
31
|
+
export { writeCompiledLiteracyConfigs, } from "./compiler-to-yaml.js";
|
|
32
|
+
// Literacy bridge — LiteracyTaskDefinition → new compiler
|
|
33
|
+
export { compileLiteracyTasks, compareCompilerOutputs, } from "./literacy-bridge.js";
|
|
34
|
+
// Telemetry — observability & tracing
|
|
35
|
+
export { checkBudget, classifyToolCall, classifyToolCalls, collectTrace, computeCost, createRedactionConfig, DEFAULT_REDACTION_RULES, estimateRunCost, extractTraceSummary, LocalTraceStore, lookupPricing, mergeTraces, redactTrace, } from "./telemetry/index.js";
|
|
36
|
+
// Presets — bundled evaluation capabilities
|
|
37
|
+
export { registerSanityLiteracyPreset, sanityLiteracyPreset, } from "./presets/index.js";
|
|
38
|
+
// Field stripping for non-deterministic outputs
|
|
39
|
+
export { buildIgnoreFieldsWrapper, compareWithIgnoredFields, stripFields, } from "./ignore-fields.js";
|
|
40
|
+
// Hash utility
|
|
41
|
+
export { simpleHash } from "./hash.js";
|
|
42
|
+
// Scoring bridge — 4-tier engine integration
|
|
43
|
+
export { scoreTestGroup } from "./scoring-bridge.js";
|
|
44
|
+
// Unified config loader
|
|
45
|
+
export { ConfigNotFoundError, loadConfigFile, tryLoadConfigFile, } from "./config-loader.js";
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Literacy bridge — maps LiteracyTaskDefinition to the new compiler pipeline.
|
|
3
|
+
*
|
|
4
|
+
* This module bridges the task loading system (TaskSource →
|
|
5
|
+
* GeneralizedTaskDefinition) and the compiler (TaskGraph →
|
|
6
|
+
* PromptfooCompiler). It allows literacy tasks to run through
|
|
7
|
+
* the compiler.
|
|
8
|
+
*
|
|
9
|
+
* Pipeline: LiteracyTaskDefinition[] → TaskGraphBuilder → topological order →
|
|
10
|
+
* LiteracyModeHandler (per task) → LiteracyBridgeResult
|
|
11
|
+
*
|
|
12
|
+
* Key behaviors:
|
|
13
|
+
* - Tasks without explicit mode get mode: "literacy" (backward compat)
|
|
14
|
+
* - LiteracyTaskDefinition fields map to compiler input fields
|
|
15
|
+
* - Rubric config is loaded from config/rubrics
|
|
16
|
+
* - Prompts from config/prompts are integrated
|
|
17
|
+
* - TaskGraphBuilder validates the DAG, deduplicates, and orders tasks
|
|
18
|
+
*
|
|
19
|
+
* @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
|
|
20
|
+
*/
|
|
21
|
+
import type { LiteracyTaskDefinition } from "../../_vendor/ailf-core/index.d.ts";
|
|
22
|
+
import { type LiteracyCompileResult } from "./mode-handlers/literacy-handler.js";
|
|
23
|
+
import { type LiteracyEvalSubMode } from "../normalize-mode.js";
|
|
24
|
+
/** Options for compiling all literacy tasks via the new compiler */
|
|
25
|
+
export interface LiteracyBridgeOptions {
|
|
26
|
+
/** Root directory of the eval package */
|
|
27
|
+
rootDir: string;
|
|
28
|
+
/** Eval mode to compile for */
|
|
29
|
+
evalMode?: LiteracyEvalSubMode;
|
|
30
|
+
/** Grader provider ID */
|
|
31
|
+
graderProvider?: string;
|
|
32
|
+
/** Model providers */
|
|
33
|
+
models?: {
|
|
34
|
+
id: string;
|
|
35
|
+
label: string;
|
|
36
|
+
config?: Record<string, unknown>;
|
|
37
|
+
}[];
|
|
38
|
+
}
|
|
39
|
+
/** Result of compiling all literacy tasks */
|
|
40
|
+
export interface LiteracyBridgeResult {
|
|
41
|
+
/** Per-task compilation results */
|
|
42
|
+
tasks: {
|
|
43
|
+
taskId: string;
|
|
44
|
+
result: LiteracyCompileResult;
|
|
45
|
+
}[];
|
|
46
|
+
/** All warnings across tasks */
|
|
47
|
+
warnings: string[];
|
|
48
|
+
/** Total test cases generated */
|
|
49
|
+
totalTests: number;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Compile an array of LiteracyTaskDefinition through the literacy handler.
|
|
53
|
+
*
|
|
54
|
+
* This is the primary entry point for Phase 7 migration. It takes
|
|
55
|
+
* LiteracyTaskDefinition[] and routes them through the compiler pipeline:
|
|
56
|
+
*
|
|
57
|
+
* LiteracyTaskDefinition[] → TaskGraphBuilder → topological order →
|
|
58
|
+
* LiteracyModeHandler (per task) → LiteracyBridgeResult
|
|
59
|
+
*
|
|
60
|
+
* The TaskGraphBuilder provides:
|
|
61
|
+
* - Duplicate task ID detection (warns on collisions)
|
|
62
|
+
* - Status-based filtering (archived/paused/draft)
|
|
63
|
+
* - Dependency edge discovery and DAG cycle validation
|
|
64
|
+
* - Topological priority assignment (tasks with deps run in order)
|
|
65
|
+
*
|
|
66
|
+
* Note: The incoming tasks are typically pre-filtered by the pipeline
|
|
67
|
+
* step (area/tag/taskId filters + release auto-scope). The graph
|
|
68
|
+
* builder's own filtering is intentionally invoked WITHOUT a filter
|
|
69
|
+
* argument to avoid double-filtering — it still applies status-based
|
|
70
|
+
* rules (e.g., rejecting archived tasks that slipped through).
|
|
71
|
+
*/
|
|
72
|
+
export declare function compileLiteracyTasks(tasks: LiteracyTaskDefinition[], options: LiteracyBridgeOptions): LiteracyBridgeResult;
|
|
73
|
+
/**
|
|
74
|
+
* Compare old-style expanded entries with new-style compiled entries.
|
|
75
|
+
*
|
|
76
|
+
* This is the parallel comparison gate (task 7b). For each task, it
|
|
77
|
+
* checks that the new compiler produces structurally equivalent output
|
|
78
|
+
* to the legacy expand-tasks path.
|
|
79
|
+
*/
|
|
80
|
+
export declare function compareCompilerOutputs(legacyEntries: LegacyEntry[], newResult: LiteracyBridgeResult): ComparisonResult;
|
|
81
|
+
/** Minimal legacy entry shape (from expand-tasks) */
|
|
82
|
+
export interface LegacyEntry {
|
|
83
|
+
description?: string;
|
|
84
|
+
vars?: Record<string, unknown>;
|
|
85
|
+
assert?: {
|
|
86
|
+
type: string;
|
|
87
|
+
value?: unknown;
|
|
88
|
+
}[];
|
|
89
|
+
prompts?: string[];
|
|
90
|
+
}
|
|
91
|
+
export interface ComparisonDiscrepancy {
|
|
92
|
+
taskId: string;
|
|
93
|
+
field: string;
|
|
94
|
+
legacy: unknown;
|
|
95
|
+
new_: unknown;
|
|
96
|
+
message: string;
|
|
97
|
+
}
|
|
98
|
+
export interface ComparisonResult {
|
|
99
|
+
passed: boolean;
|
|
100
|
+
discrepancies: ComparisonDiscrepancy[];
|
|
101
|
+
summary: string;
|
|
102
|
+
}
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Literacy bridge — maps LiteracyTaskDefinition to the new compiler pipeline.
|
|
3
|
+
*
|
|
4
|
+
* This module bridges the task loading system (TaskSource →
|
|
5
|
+
* GeneralizedTaskDefinition) and the compiler (TaskGraph →
|
|
6
|
+
* PromptfooCompiler). It allows literacy tasks to run through
|
|
7
|
+
* the compiler.
|
|
8
|
+
*
|
|
9
|
+
* Pipeline: LiteracyTaskDefinition[] → TaskGraphBuilder → topological order →
|
|
10
|
+
* LiteracyModeHandler (per task) → LiteracyBridgeResult
|
|
11
|
+
*
|
|
12
|
+
* Key behaviors:
|
|
13
|
+
* - Tasks without explicit mode get mode: "literacy" (backward compat)
|
|
14
|
+
* - LiteracyTaskDefinition fields map to compiler input fields
|
|
15
|
+
* - Rubric config is loaded from config/rubrics
|
|
16
|
+
* - Prompts from config/prompts are integrated
|
|
17
|
+
* - TaskGraphBuilder validates the DAG, deduplicates, and orders tasks
|
|
18
|
+
*
|
|
19
|
+
* @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
|
|
20
|
+
*/
|
|
21
|
+
import { compileLiteracyTask, } from "./mode-handlers/literacy-handler.js";
|
|
22
|
+
import { tryLoadConfigFile } from "./config-loader.js";
|
|
23
|
+
import { buildTaskGraph } from "./task-graph-builder.js";
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
// Public API
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
/**
|
|
28
|
+
* Compile an array of LiteracyTaskDefinition through the literacy handler.
|
|
29
|
+
*
|
|
30
|
+
* This is the primary entry point for Phase 7 migration. It takes
|
|
31
|
+
* LiteracyTaskDefinition[] and routes them through the compiler pipeline:
|
|
32
|
+
*
|
|
33
|
+
* LiteracyTaskDefinition[] → TaskGraphBuilder → topological order →
|
|
34
|
+
* LiteracyModeHandler (per task) → LiteracyBridgeResult
|
|
35
|
+
*
|
|
36
|
+
* The TaskGraphBuilder provides:
|
|
37
|
+
* - Duplicate task ID detection (warns on collisions)
|
|
38
|
+
* - Status-based filtering (archived/paused/draft)
|
|
39
|
+
* - Dependency edge discovery and DAG cycle validation
|
|
40
|
+
* - Topological priority assignment (tasks with deps run in order)
|
|
41
|
+
*
|
|
42
|
+
* Note: The incoming tasks are typically pre-filtered by the pipeline
|
|
43
|
+
* step (area/tag/taskId filters + release auto-scope). The graph
|
|
44
|
+
* builder's own filtering is intentionally invoked WITHOUT a filter
|
|
45
|
+
* argument to avoid double-filtering — it still applies status-based
|
|
46
|
+
* rules (e.g., rejecting archived tasks that slipped through).
|
|
47
|
+
*/
|
|
48
|
+
export function compileLiteracyTasks(tasks, options) {
|
|
49
|
+
const rubricConfig = loadRubricConfig(options.rootDir);
|
|
50
|
+
const warnings = [];
|
|
51
|
+
const results = [];
|
|
52
|
+
let totalTests = 0;
|
|
53
|
+
// Build the task graph — validates DAG, deduplicates, assigns priority.
|
|
54
|
+
// No filter passed: tasks are already pre-filtered by the pipeline step.
|
|
55
|
+
const graphResult = buildTaskGraph({ tasks });
|
|
56
|
+
warnings.push(...graphResult.warnings);
|
|
57
|
+
if (graphResult.filteredOut.length > 0) {
|
|
58
|
+
warnings.push(`TaskGraphBuilder filtered out ${graphResult.filteredOut.length} task(s) ` +
|
|
59
|
+
`by status: ${graphResult.filteredOut.join(", ")}`);
|
|
60
|
+
}
|
|
61
|
+
// If all tasks were filtered out, return empty result
|
|
62
|
+
if (!graphResult.graph) {
|
|
63
|
+
return { tasks: [], warnings, totalTests: 0 };
|
|
64
|
+
}
|
|
65
|
+
// Extract tasks in topological order from the graph.
|
|
66
|
+
// The graph nodes are keyed by taskId; we sort by priority (lower = first)
|
|
67
|
+
// and look up the original LiteracyTaskDefinition for each node.
|
|
68
|
+
const taskMap = new Map(tasks.map((t) => [t.id, t]));
|
|
69
|
+
const orderedNodes = [...graphResult.graph.nodes.values()].sort((a, b) => a.priority - b.priority);
|
|
70
|
+
const compileOptions = {
|
|
71
|
+
graderProvider: options.graderProvider,
|
|
72
|
+
rootDir: options.rootDir,
|
|
73
|
+
evalMode: options.evalMode,
|
|
74
|
+
models: options.models,
|
|
75
|
+
rubricConfig,
|
|
76
|
+
};
|
|
77
|
+
for (const node of orderedNodes) {
|
|
78
|
+
const task = taskMap.get(node.taskId);
|
|
79
|
+
if (!task) {
|
|
80
|
+
warnings.push(`TaskGraphBuilder produced node "${node.taskId}" with no matching LiteracyTaskDefinition — skipped`);
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
const result = compileLiteracyTask(task, compileOptions);
|
|
84
|
+
results.push({ taskId: task.id, result });
|
|
85
|
+
warnings.push(...result.warnings);
|
|
86
|
+
totalTests += result.tests.length;
|
|
87
|
+
}
|
|
88
|
+
return { tasks: results, warnings, totalTests };
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Compare old-style expanded entries with new-style compiled entries.
|
|
92
|
+
*
|
|
93
|
+
* This is the parallel comparison gate (task 7b). For each task, it
|
|
94
|
+
* checks that the new compiler produces structurally equivalent output
|
|
95
|
+
* to the legacy expand-tasks path.
|
|
96
|
+
*/
|
|
97
|
+
export function compareCompilerOutputs(legacyEntries, newResult) {
|
|
98
|
+
const discrepancies = [];
|
|
99
|
+
for (const { taskId, result } of newResult.tasks) {
|
|
100
|
+
// Find matching legacy entries by task description
|
|
101
|
+
const legacyForTask = legacyEntries.filter((e) => e.description?.includes(taskId) ||
|
|
102
|
+
e.description?.includes("(gold)") ||
|
|
103
|
+
e.description?.includes("(baseline)"));
|
|
104
|
+
// Check test count matches
|
|
105
|
+
const newTestCount = result.tests.length;
|
|
106
|
+
const legacyGold = legacyForTask.filter((e) => e.description?.includes("(gold)"));
|
|
107
|
+
const legacyBaseline = legacyForTask.filter((e) => e.description?.includes("(baseline)"));
|
|
108
|
+
const legacyCount = legacyGold.length + legacyBaseline.length;
|
|
109
|
+
if (legacyCount > 0 && newTestCount !== legacyCount) {
|
|
110
|
+
discrepancies.push({
|
|
111
|
+
taskId,
|
|
112
|
+
field: "testCount",
|
|
113
|
+
legacy: legacyCount,
|
|
114
|
+
new_: newTestCount,
|
|
115
|
+
message: `Test count mismatch: legacy=${legacyCount}, new=${newTestCount}`,
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
// Check assertion count on gold entries
|
|
119
|
+
for (const test of result.tests) {
|
|
120
|
+
if (test.description.includes("(gold)")) {
|
|
121
|
+
const assertCount = test.assert?.length ?? 0;
|
|
122
|
+
const matchingLegacy = legacyGold[0];
|
|
123
|
+
if (matchingLegacy?.assert) {
|
|
124
|
+
const legacyAssertCount = matchingLegacy.assert.length;
|
|
125
|
+
if (assertCount !== legacyAssertCount) {
|
|
126
|
+
discrepancies.push({
|
|
127
|
+
taskId,
|
|
128
|
+
field: "assertionCount",
|
|
129
|
+
legacy: legacyAssertCount,
|
|
130
|
+
new_: assertCount,
|
|
131
|
+
message: `Gold assertion count mismatch: legacy=${legacyAssertCount}, new=${assertCount}`,
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
return {
|
|
139
|
+
passed: discrepancies.length === 0,
|
|
140
|
+
discrepancies,
|
|
141
|
+
summary: discrepancies.length === 0
|
|
142
|
+
? "All tasks produce structurally equivalent output"
|
|
143
|
+
: `${discrepancies.length} discrepancy(ies) found`,
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
// ---------------------------------------------------------------------------
|
|
147
|
+
// Rubric config loading
|
|
148
|
+
// ---------------------------------------------------------------------------
|
|
149
|
+
function loadRubricConfig(rootDir) {
|
|
150
|
+
const result = tryLoadConfigFile("rubrics", rootDir);
|
|
151
|
+
if (!result)
|
|
152
|
+
return undefined;
|
|
153
|
+
try {
|
|
154
|
+
const parsed = result.data;
|
|
155
|
+
if (!parsed?.templates)
|
|
156
|
+
return undefined;
|
|
157
|
+
const templates = {};
|
|
158
|
+
for (const [key, val] of Object.entries(parsed.templates)) {
|
|
159
|
+
const t = val;
|
|
160
|
+
templates[key] = {
|
|
161
|
+
header: String(t.header ?? ""),
|
|
162
|
+
scale: t.scale ?? [],
|
|
163
|
+
dimension: t.dimension ? String(t.dimension) : undefined,
|
|
164
|
+
criteria_label: t.criteria_label ? String(t.criteria_label) : undefined,
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
return { templates };
|
|
168
|
+
}
|
|
169
|
+
catch {
|
|
170
|
+
return undefined;
|
|
171
|
+
}
|
|
172
|
+
}
|