@sanity/ailf 0.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/config/features.ts +23 -0
- package/config/models.ts +95 -0
- package/config/prompts.ts +16 -0
- package/config/rubrics.ts +225 -0
- package/config/schedules.ts +47 -0
- package/config/sinks.ts +37 -0
- package/config/sources.ts +21 -0
- package/config/thresholds.ts +61 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +171 -0
- package/dist/_vendor/ailf-core/config-helpers.js +170 -0
- package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
- package/dist/_vendor/ailf-core/env-helper.js +45 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/examples/index.js +25 -0
- package/dist/_vendor/ailf-core/index.d.ts +3 -0
- package/dist/_vendor/ailf-core/index.js +5 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +17 -2
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
- package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
- package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +8 -2
- package/dist/_vendor/ailf-core/schemas/eval-config.js +17 -2
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +9 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +8 -1
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -31
- package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -9
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
- package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
- package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/services/index.js +2 -1
- package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
- package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
- package/dist/_vendor/ailf-core/services/scoring.js +25 -15
- package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
- package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +332 -0
- package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +45 -83
- package/dist/_vendor/ailf-core/types/index.js +8 -1
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +257 -0
- package/dist/_vendor/ailf-core/types/plugin-registry.js +185 -0
- package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
- package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
- package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
- package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
- package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
- package/dist/_vendor/ailf-core/types/trace.js +18 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
- package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
- package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
- package/dist/_vendor/ailf-shared/index.d.ts +0 -1
- package/dist/_vendor/ailf-shared/index.js +0 -1
- package/dist/adapters/api-client/build-request.js +14 -13
- package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
- package/dist/adapters/config-sources/file-config-adapter.js +39 -12
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +1 -0
- package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
- package/dist/adapters/config-sources/ts-config-loader.js +141 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
- package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +35 -39
- package/dist/adapters/task-sources/index.d.ts +3 -2
- package/dist/adapters/task-sources/index.js +3 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
- package/dist/adapters/task-sources/repo-schemas.js +227 -19
- package/dist/adapters/task-sources/repo-task-source.d.ts +16 -12
- package/dist/adapters/task-sources/repo-task-source.js +92 -80
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
- package/dist/adapters/task-sources/task-file-loader.js +83 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
- package/dist/adapters/task-sources/yaml-task-source.js +19 -16
- package/dist/cli.js +0 -2
- package/dist/commands/baseline.js +4 -1
- package/dist/commands/calculate-scores.js +1 -1
- package/dist/commands/coverage-audit.js +9 -1
- package/dist/commands/explain-handler.js +25 -23
- package/dist/commands/fetch-docs.js +3 -2
- package/dist/commands/generate-configs.js +1 -1
- package/dist/commands/init.d.ts +6 -4
- package/dist/commands/init.js +302 -23
- package/dist/commands/interactive.js +11 -7
- package/dist/commands/pipeline-action.d.ts +2 -0
- package/dist/commands/pipeline-action.js +16 -6
- package/dist/commands/pipeline.d.ts +1 -0
- package/dist/commands/pipeline.js +4 -2
- package/dist/commands/pr-comment.js +1 -1
- package/dist/commands/publish.js +2 -2
- package/dist/commands/readiness-report.js +13 -6
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +13 -1
- package/dist/composition-root.js +99 -4
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/orchestration/build-app-context.js +1 -0
- package/dist/orchestration/build-step-sequence.js +28 -8
- package/dist/orchestration/steps/calculate-scores-step.js +24 -11
- package/dist/orchestration/steps/fetch-docs-step.js +8 -7
- package/dist/orchestration/steps/gap-analysis-step.js +8 -7
- package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
- package/dist/orchestration/steps/generate-configs-step.js +261 -51
- package/dist/orchestration/steps/grader-consistency-step.js +7 -4
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/readiness-step.js +5 -6
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
- package/dist/orchestration/steps/run-eval-step.js +8 -7
- package/dist/pipeline/cache.d.ts +1 -1
- package/dist/pipeline/cache.js +36 -8
- package/dist/pipeline/calculate-scores.d.ts +2 -4
- package/dist/pipeline/calculate-scores.js +43 -113
- package/dist/pipeline/checks.js +2 -2
- package/dist/pipeline/compare.js +8 -8
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +392 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +404 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
- package/dist/pipeline/compiler/assertion-mapper.js +175 -0
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
- package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
- package/dist/pipeline/compiler/config-loader.d.ts +56 -0
- package/dist/pipeline/compiler/config-loader.js +111 -0
- package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
- package/dist/pipeline/compiler/fixture-resolver.js +113 -0
- package/dist/pipeline/compiler/hash.d.ts +11 -0
- package/dist/pipeline/compiler/hash.js +18 -0
- package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
- package/dist/pipeline/compiler/ignore-fields.js +113 -0
- package/dist/pipeline/compiler/index.d.ts +29 -0
- package/dist/pipeline/compiler/index.js +45 -0
- package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
- package/dist/pipeline/compiler/literacy-bridge.js +172 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +15 -0
- package/dist/pipeline/compiler/mode-handlers/index.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/index.d.ts +9 -0
- package/dist/pipeline/compiler/presets/index.js +8 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +42 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.js +208 -0
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
- package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
- package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
- package/dist/pipeline/compiler/provider-assembler.js +137 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
- package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
- package/dist/pipeline/compiler/sandbox/index.js +11 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
- package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
- package/dist/pipeline/compiler/scoring-bridge.js +114 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
- package/dist/pipeline/compiler/task-graph-builder.js +291 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
- package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
- package/dist/pipeline/compiler/telemetry/index.js +19 -0
- package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
- package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
- package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
- package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
- package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
- package/dist/pipeline/compiler/variable-resolver.js +115 -0
- package/dist/pipeline/coverage-audit.d.ts +15 -5
- package/dist/pipeline/coverage-audit.js +41 -22
- package/dist/pipeline/eval-constants.d.ts +16 -6
- package/dist/pipeline/eval-constants.js +25 -4
- package/dist/pipeline/eval-fingerprint.d.ts +2 -2
- package/dist/pipeline/eval-fingerprint.js +8 -9
- package/dist/pipeline/expand-tasks.d.ts +19 -10
- package/dist/pipeline/expand-tasks.js +34 -28
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +2 -2
- package/dist/pipeline/generate-configs.d.ts +22 -4
- package/dist/pipeline/generate-configs.js +53 -24
- package/dist/pipeline/grader-api.d.ts +3 -3
- package/dist/pipeline/grader-api.js +5 -12
- package/dist/pipeline/grader-compare-runner.js +20 -27
- package/dist/pipeline/grader-comparison.d.ts +4 -8
- package/dist/pipeline/grader-comparison.js +11 -17
- package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
- package/dist/pipeline/grader-consistency-runner.js +16 -20
- package/dist/pipeline/grader-consistency.d.ts +6 -10
- package/dist/pipeline/grader-consistency.js +13 -32
- package/dist/pipeline/grader-sensitivity-runner.js +7 -5
- package/dist/pipeline/grader-sensitivity.d.ts +2 -6
- package/dist/pipeline/grader-sensitivity.js +10 -10
- package/dist/pipeline/grader-validate-runner.js +7 -5
- package/dist/pipeline/grader-validation.d.ts +2 -6
- package/dist/pipeline/grader-validation.js +14 -22
- package/dist/pipeline/map-request-to-config.js +7 -1
- package/dist/pipeline/mirror-repo-tasks.d.ts +13 -13
- package/dist/pipeline/mirror-repo-tasks.js +22 -21
- package/dist/pipeline/normalize-mode.d.ts +49 -0
- package/dist/pipeline/normalize-mode.js +64 -0
- package/dist/pipeline/plan.d.ts +5 -2
- package/dist/pipeline/plan.js +134 -78
- package/dist/pipeline/pr-comment.js +2 -0
- package/dist/pipeline/profile-resolution.d.ts +22 -14
- package/dist/pipeline/profile-resolution.js +41 -19
- package/dist/pipeline/provenance.d.ts +2 -2
- package/dist/pipeline/provenance.js +12 -17
- package/dist/pipeline/release-report.js +4 -4
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/rubric-loader.d.ts +20 -0
- package/dist/pipeline/rubric-loader.js +37 -0
- package/dist/pipeline/validate.d.ts +4 -4
- package/dist/pipeline/validate.js +64 -53
- package/dist/schedules/loader.js +18 -8
- package/dist/scripts/migrate-task-mode.d.ts +24 -0
- package/dist/scripts/migrate-task-mode.js +85 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +15 -15
- package/dist/sinks/loader.js +5 -7
- package/dist/sources.d.ts +7 -7
- package/dist/sources.js +22 -24
- package/dist/webhook/dispatch.js +2 -1
- package/package.json +15 -4
- package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
- package/tasks/literacy/frameworks.task.ts +128 -0
- package/tasks/literacy/functions.task.ts +69 -0
- package/tasks/literacy/groq.task.ts +258 -0
- package/tasks/literacy/nextjs-live.task.ts +75 -0
- package/tasks/literacy/studio-setup.task.ts +131 -0
- package/tasks/literacy/visual-editing.task.ts +146 -0
- package/config/features.yaml +0 -116
- package/config/models.yaml +0 -116
- package/config/prompts.yaml +0 -75
- package/config/rubrics.yaml +0 -81
- package/config/schedules.yaml +0 -43
- package/config/sinks.yaml +0 -54
- package/config/sources.yaml +0 -51
- package/config/thresholds.yaml +0 -49
- package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
- package/dist/_vendor/ailf-tasks/cli.js +0 -61
- package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
- package/dist/_vendor/ailf-tasks/index.js +0 -16
- package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
- package/dist/_vendor/ailf-tasks/parser.js +0 -73
- package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
- package/dist/_vendor/ailf-tasks/schemas.js +0 -180
- package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
- package/dist/_vendor/ailf-tasks/validation.js +0 -162
- package/dist/agent-observer/test-imports.d.ts +0 -7
- package/dist/agent-observer/test-imports.js +0 -185
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sanity-literacy preset — Sanity-specific domain configuration for literacy evaluation.
|
|
3
|
+
*
|
|
4
|
+
* This is a domain preset that targets the `literacy` mode base. It provides
|
|
5
|
+
* Sanity-specific configuration:
|
|
6
|
+
* - Sanity doc source definitions (production, branch, local)
|
|
7
|
+
* - Product feature registry for coverage auditing
|
|
8
|
+
* - DocFetcher factory (SanityDocFetcher)
|
|
9
|
+
* - Sanity fixture resolver (sanity:// scheme)
|
|
10
|
+
*
|
|
11
|
+
* Evaluation methodology (rubrics, scoring, prompts) is inherited from the
|
|
12
|
+
* `literacy` mode base — see mode-bases/literacy.ts.
|
|
13
|
+
*
|
|
14
|
+
* @see docs/exec-plans/architecture-overhaul/phase-8-scoring-storage-presets.md
|
|
15
|
+
*/
|
|
16
|
+
import { env } from "../../../_vendor/ailf-core/index.js";
|
|
17
|
+
import { SanityDocFetcher } from "../../../adapters/doc-fetchers/index.js";
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
// Factory
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
/**
|
|
22
|
+
* Create the sanity-literacy preset with all Sanity-specific configuration.
|
|
23
|
+
*
|
|
24
|
+
* The `rootDir` option controls where the SanityDocFetcher resolves file
|
|
25
|
+
* paths. When omitted it defaults to `process.cwd()`, which matches the
|
|
26
|
+
* composition root's existing behavior.
|
|
27
|
+
*/
|
|
28
|
+
export function createSanityLiteracyPreset(options) {
|
|
29
|
+
const rootDir = options?.rootDir ?? process.cwd();
|
|
30
|
+
return {
|
|
31
|
+
name: "sanity-literacy",
|
|
32
|
+
manifest: {
|
|
33
|
+
name: "sanity-literacy",
|
|
34
|
+
version: "1.0.0",
|
|
35
|
+
description: "Documentation literacy evaluation for Sanity.io — measures how " +
|
|
36
|
+
"effectively documentation enables AI coding tools to implement " +
|
|
37
|
+
"features correctly.",
|
|
38
|
+
pluginApiVersion: 1,
|
|
39
|
+
},
|
|
40
|
+
// ── Mode ──────────────────────────────────────────────────
|
|
41
|
+
// Targets the literacy mode base. Evaluation methodology (rubrics,
|
|
42
|
+
// scoring, prompts) is inherited from mode-bases/literacy.ts.
|
|
43
|
+
mode: "literacy",
|
|
44
|
+
// ── Fixture resolvers ────────────────────────────────────
|
|
45
|
+
fixtureResolvers: [
|
|
46
|
+
{
|
|
47
|
+
scheme: "sanity://",
|
|
48
|
+
handlerModule: "./fixture-resolver.js",
|
|
49
|
+
},
|
|
50
|
+
],
|
|
51
|
+
// ── Doc fetcher factory ──────────────────────────────────
|
|
52
|
+
// Closure captures rootDir so the registry can instantiate
|
|
53
|
+
// the fetcher without knowing about Sanity internals.
|
|
54
|
+
docFetcher: () => new SanityDocFetcher(rootDir),
|
|
55
|
+
// ── Source definitions ────────────────────────────────────
|
|
56
|
+
// Mirrors config/sources.ts — env() calls are evaluated at
|
|
57
|
+
// preset creation time, matching the existing runtime behavior.
|
|
58
|
+
sourceDefs: [
|
|
59
|
+
{
|
|
60
|
+
name: "production",
|
|
61
|
+
baseUrl: "https://www.sanity.io/docs",
|
|
62
|
+
projectId: env("SANITY_PROJECT_ID", "3do82whm"),
|
|
63
|
+
dataset: env("SANITY_DATASET", "next"),
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
name: "branch",
|
|
67
|
+
baseUrl: env("DOC_BASE_URL", ""),
|
|
68
|
+
projectId: env("SANITY_PROJECT_ID", "3do82whm"),
|
|
69
|
+
dataset: env("SANITY_DATASET", "next"),
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
name: "local",
|
|
73
|
+
baseUrl: `http://localhost:${env("DOC_LOCAL_PORT", "3001")}/docs`,
|
|
74
|
+
projectId: env("SANITY_PROJECT_ID", "3do82whm"),
|
|
75
|
+
dataset: env("SANITY_DATASET", "next"),
|
|
76
|
+
},
|
|
77
|
+
],
|
|
78
|
+
// ── Feature registry ─────────────────────────────────────
|
|
79
|
+
// Complete product feature set for coverage auditing.
|
|
80
|
+
featureDefs: {
|
|
81
|
+
features: [
|
|
82
|
+
// Currently covered (have evaluation tasks)
|
|
83
|
+
{
|
|
84
|
+
id: "groq",
|
|
85
|
+
name: "GROQ Query Language",
|
|
86
|
+
sections: ["content-lake"],
|
|
87
|
+
status: "covered",
|
|
88
|
+
area: "groq",
|
|
89
|
+
priority: "critical",
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
id: "visual-editing",
|
|
93
|
+
name: "Visual Editing",
|
|
94
|
+
sections: ["visual-editing"],
|
|
95
|
+
status: "covered",
|
|
96
|
+
area: "visual-editing",
|
|
97
|
+
priority: "critical",
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
id: "nextjs-live",
|
|
101
|
+
name: "Next.js Live Preview",
|
|
102
|
+
sections: ["visual-editing"],
|
|
103
|
+
status: "covered",
|
|
104
|
+
area: "nextjs-live",
|
|
105
|
+
priority: "high",
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
id: "functions",
|
|
109
|
+
name: "Sanity Functions",
|
|
110
|
+
sections: ["compute-and-ai"],
|
|
111
|
+
status: "covered",
|
|
112
|
+
area: "functions",
|
|
113
|
+
priority: "high",
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
id: "studio-setup",
|
|
117
|
+
name: "Studio Configuration",
|
|
118
|
+
sections: ["studio"],
|
|
119
|
+
status: "covered",
|
|
120
|
+
area: "studio-setup",
|
|
121
|
+
priority: "high",
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
id: "frameworks",
|
|
125
|
+
name: "Framework Integration",
|
|
126
|
+
sections: ["developer-guides"],
|
|
127
|
+
status: "covered",
|
|
128
|
+
area: "frameworks",
|
|
129
|
+
priority: "high",
|
|
130
|
+
},
|
|
131
|
+
// Uncovered (no evaluation tasks yet)
|
|
132
|
+
{
|
|
133
|
+
id: "portable-text",
|
|
134
|
+
name: "Portable Text",
|
|
135
|
+
sections: ["content-lake", "studio"],
|
|
136
|
+
status: "uncovered",
|
|
137
|
+
priority: "high",
|
|
138
|
+
},
|
|
139
|
+
{
|
|
140
|
+
id: "image-assets",
|
|
141
|
+
name: "Image & Asset Handling",
|
|
142
|
+
sections: ["content-lake", "apis-and-sdks"],
|
|
143
|
+
status: "uncovered",
|
|
144
|
+
priority: "high",
|
|
145
|
+
},
|
|
146
|
+
{
|
|
147
|
+
id: "mutations",
|
|
148
|
+
name: "Mutations & Transactions",
|
|
149
|
+
sections: ["content-lake", "apis-and-sdks"],
|
|
150
|
+
status: "uncovered",
|
|
151
|
+
priority: "high",
|
|
152
|
+
},
|
|
153
|
+
{
|
|
154
|
+
id: "schemas",
|
|
155
|
+
name: "Schema Types & Validation",
|
|
156
|
+
sections: ["studio", "content-lake"],
|
|
157
|
+
status: "uncovered",
|
|
158
|
+
priority: "medium",
|
|
159
|
+
},
|
|
160
|
+
{
|
|
161
|
+
id: "authentication",
|
|
162
|
+
name: "Authentication & Access Control",
|
|
163
|
+
sections: ["apis-and-sdks"],
|
|
164
|
+
status: "uncovered",
|
|
165
|
+
priority: "medium",
|
|
166
|
+
},
|
|
167
|
+
{
|
|
168
|
+
id: "webhooks",
|
|
169
|
+
name: "Webhooks",
|
|
170
|
+
sections: ["content-lake"],
|
|
171
|
+
status: "uncovered",
|
|
172
|
+
priority: "medium",
|
|
173
|
+
},
|
|
174
|
+
{
|
|
175
|
+
id: "realtime",
|
|
176
|
+
name: "Real-time Listeners",
|
|
177
|
+
sections: ["apis-and-sdks"],
|
|
178
|
+
status: "uncovered",
|
|
179
|
+
priority: "low",
|
|
180
|
+
},
|
|
181
|
+
{
|
|
182
|
+
id: "ai-assist",
|
|
183
|
+
name: "AI Assist",
|
|
184
|
+
sections: ["compute-and-ai", "studio"],
|
|
185
|
+
status: "uncovered",
|
|
186
|
+
priority: "medium",
|
|
187
|
+
},
|
|
188
|
+
],
|
|
189
|
+
},
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
// ---------------------------------------------------------------------------
|
|
193
|
+
// Convenience constant — preset with default options for backward compat
|
|
194
|
+
// ---------------------------------------------------------------------------
|
|
195
|
+
/**
|
|
196
|
+
* Pre-built preset instance using default options (cwd for rootDir).
|
|
197
|
+
*
|
|
198
|
+
* Prefer `createSanityLiteracyPreset()` when you need to control rootDir
|
|
199
|
+
* or other options. This constant exists for backward compatibility with
|
|
200
|
+
* code that imports `sanityLiteracyPreset` directly.
|
|
201
|
+
*/
|
|
202
|
+
export const sanityLiteracyPreset = createSanityLiteracyPreset();
|
|
203
|
+
/**
|
|
204
|
+
* Register the sanity-literacy preset in a plugin registry.
|
|
205
|
+
*/
|
|
206
|
+
export function registerSanityLiteracyPreset(registry) {
|
|
207
|
+
registry.registerPreset(sanityLiteracyPreset);
|
|
208
|
+
}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PromptfooCompiler — compiles a TaskGraph into Promptfoo YAML configuration.
|
|
3
|
+
*
|
|
4
|
+
* The compiler is the core of the new architecture. It takes a validated
|
|
5
|
+
* TaskGraph and produces a Promptfoo config that can be executed via
|
|
6
|
+
* `promptfoo eval`.
|
|
7
|
+
*
|
|
8
|
+
* Compilation pipeline:
|
|
9
|
+
* TaskGraph → resolve fixtures → resolve variables → map assertions
|
|
10
|
+
* → assemble prompts → assemble providers → emit YAML
|
|
11
|
+
*
|
|
12
|
+
* This module exists alongside `generate-configs.ts` — it does NOT replace
|
|
13
|
+
* the existing codegen path. Phase 7 will swap callers over to the compiler.
|
|
14
|
+
*
|
|
15
|
+
* @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
|
|
16
|
+
*/
|
|
17
|
+
import type { ModeHandler, ModelsConfig, TaskGraph } from "../../_vendor/ailf-core/index.d.ts";
|
|
18
|
+
import type { EvalMode } from "../../_vendor/ailf-shared/index.d.ts";
|
|
19
|
+
import { type PromptfooAssertion } from "./assertion-mapper.js";
|
|
20
|
+
/** A single Promptfoo test case */
|
|
21
|
+
export interface PromptfooTestCase {
|
|
22
|
+
assert?: PromptfooAssertion[];
|
|
23
|
+
description: string;
|
|
24
|
+
vars: Record<string, unknown>;
|
|
25
|
+
/** Prompt filter — which prompts this test case runs against */
|
|
26
|
+
prompts?: string[];
|
|
27
|
+
}
|
|
28
|
+
/** A compiled Promptfoo configuration */
|
|
29
|
+
export interface CompiledPromptfooConfig {
|
|
30
|
+
/** Description header */
|
|
31
|
+
description: string;
|
|
32
|
+
/** Prompt entries (id, label, raw template) */
|
|
33
|
+
prompts: PromptfooPrompt[];
|
|
34
|
+
/** Provider entries */
|
|
35
|
+
providers: PromptfooProvider[];
|
|
36
|
+
/** Test cases */
|
|
37
|
+
tests: PromptfooTestCase[];
|
|
38
|
+
/** Default test configuration */
|
|
39
|
+
defaultTest?: {
|
|
40
|
+
options?: Record<string, unknown>;
|
|
41
|
+
};
|
|
42
|
+
/** Output path for results */
|
|
43
|
+
outputPath?: string;
|
|
44
|
+
}
|
|
45
|
+
/** A Promptfoo prompt entry */
|
|
46
|
+
export interface PromptfooPrompt {
|
|
47
|
+
id: string;
|
|
48
|
+
label: string;
|
|
49
|
+
raw: string;
|
|
50
|
+
}
|
|
51
|
+
/** A Promptfoo provider entry */
|
|
52
|
+
export interface PromptfooProvider {
|
|
53
|
+
id: string;
|
|
54
|
+
label?: string;
|
|
55
|
+
config?: Record<string, unknown>;
|
|
56
|
+
}
|
|
57
|
+
/** Options for the Promptfoo compiler */
|
|
58
|
+
export interface PromptfooCompilerOptions {
|
|
59
|
+
/** The evaluation mode */
|
|
60
|
+
mode: EvalMode;
|
|
61
|
+
/** Model registry */
|
|
62
|
+
models: ModelsConfig;
|
|
63
|
+
/** Root directory for fixture resolution */
|
|
64
|
+
rootDir: string;
|
|
65
|
+
/** Output path for the generated config */
|
|
66
|
+
outputPath?: string;
|
|
67
|
+
/** Grader model provider ID */
|
|
68
|
+
graderProvider?: string;
|
|
69
|
+
/** Prompt templates to use (explicit override) */
|
|
70
|
+
prompts?: PromptfooPrompt[];
|
|
71
|
+
/**
|
|
72
|
+
* Mode handler — when provided, the compiler calls handler.getPrompts()
|
|
73
|
+
* and uses those templates before falling back to explicit prompts or
|
|
74
|
+
* built-in defaults.
|
|
75
|
+
*/
|
|
76
|
+
handler?: ModeHandler;
|
|
77
|
+
}
|
|
78
|
+
/** Result of compilation */
|
|
79
|
+
export interface CompilationResult {
|
|
80
|
+
/** The compiled Promptfoo config */
|
|
81
|
+
config: CompiledPromptfooConfig;
|
|
82
|
+
/** Warnings generated during compilation */
|
|
83
|
+
warnings: string[];
|
|
84
|
+
/** Number of tasks compiled */
|
|
85
|
+
taskCount: number;
|
|
86
|
+
/** Number of test cases generated */
|
|
87
|
+
testCaseCount: number;
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Compile a TaskGraph into a Promptfoo configuration.
|
|
91
|
+
*
|
|
92
|
+
* Traverses the graph in topological order, resolves fixtures and
|
|
93
|
+
* variables for each node, maps assertions, and assembles the final
|
|
94
|
+
* Promptfoo config.
|
|
95
|
+
*/
|
|
96
|
+
export declare function compileToPromptfoo(graph: TaskGraph, options: PromptfooCompilerOptions): CompilationResult;
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PromptfooCompiler — compiles a TaskGraph into Promptfoo YAML configuration.
|
|
3
|
+
*
|
|
4
|
+
* The compiler is the core of the new architecture. It takes a validated
|
|
5
|
+
* TaskGraph and produces a Promptfoo config that can be executed via
|
|
6
|
+
* `promptfoo eval`.
|
|
7
|
+
*
|
|
8
|
+
* Compilation pipeline:
|
|
9
|
+
* TaskGraph → resolve fixtures → resolve variables → map assertions
|
|
10
|
+
* → assemble prompts → assemble providers → emit YAML
|
|
11
|
+
*
|
|
12
|
+
* This module exists alongside `generate-configs.ts` — it does NOT replace
|
|
13
|
+
* the existing codegen path. Phase 7 will swap callers over to the compiler.
|
|
14
|
+
*
|
|
15
|
+
* @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
|
|
16
|
+
*/
|
|
17
|
+
import { mapAssertions } from "./assertion-mapper.js";
|
|
18
|
+
import { resolveTaskFixtures } from "./fixture-resolver.js";
|
|
19
|
+
import { LiteracyVariant } from "../normalize-mode.js";
|
|
20
|
+
import { resolveVariables } from "./variable-resolver.js";
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
// Public API
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
/**
|
|
25
|
+
* Compile a TaskGraph into a Promptfoo configuration.
|
|
26
|
+
*
|
|
27
|
+
* Traverses the graph in topological order, resolves fixtures and
|
|
28
|
+
* variables for each node, maps assertions, and assembles the final
|
|
29
|
+
* Promptfoo config.
|
|
30
|
+
*/
|
|
31
|
+
export function compileToPromptfoo(graph, options) {
|
|
32
|
+
const warnings = [];
|
|
33
|
+
const tests = [];
|
|
34
|
+
// Sort nodes by priority (topological order)
|
|
35
|
+
const sortedNodes = [...graph.nodes.values()].sort((a, b) => a.priority - b.priority);
|
|
36
|
+
// Compile each node into test cases
|
|
37
|
+
for (const node of sortedNodes) {
|
|
38
|
+
const compiled = compileNode(node, graph, options, warnings);
|
|
39
|
+
tests.push(...compiled);
|
|
40
|
+
}
|
|
41
|
+
// Build providers list from model registry
|
|
42
|
+
const providers = buildProviders(options.models, options.mode);
|
|
43
|
+
// Prompt resolution: handler-owned → explicit override → built-in defaults
|
|
44
|
+
const prompts = resolvePrompts(options);
|
|
45
|
+
const config = {
|
|
46
|
+
description: `AILF evaluation — ${options.mode} mode (${tests.length} test cases)`,
|
|
47
|
+
prompts,
|
|
48
|
+
providers,
|
|
49
|
+
tests,
|
|
50
|
+
...(options.outputPath ? { outputPath: options.outputPath } : {}),
|
|
51
|
+
...(options.graderProvider
|
|
52
|
+
? {
|
|
53
|
+
defaultTest: {
|
|
54
|
+
options: {
|
|
55
|
+
provider: options.graderProvider,
|
|
56
|
+
},
|
|
57
|
+
},
|
|
58
|
+
}
|
|
59
|
+
: {}),
|
|
60
|
+
};
|
|
61
|
+
return {
|
|
62
|
+
config,
|
|
63
|
+
taskCount: sortedNodes.length,
|
|
64
|
+
testCaseCount: tests.length,
|
|
65
|
+
warnings,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
// ---------------------------------------------------------------------------
|
|
69
|
+
// Node compilation
|
|
70
|
+
// ---------------------------------------------------------------------------
|
|
71
|
+
function compileNode(node, graph, options, warnings) {
|
|
72
|
+
// Resolve fixtures using a minimal GeneralizedTaskDefinition stub.
|
|
73
|
+
// The fixture resolver needs task.id and context.docs — we use the
|
|
74
|
+
// node's mode (propagated from the original task definition) to
|
|
75
|
+
// construct the correct variant stub.
|
|
76
|
+
const nodeMode = node.mode ?? options.mode ?? "literacy";
|
|
77
|
+
const fixtureResult = resolveTaskFixtures({
|
|
78
|
+
mode: nodeMode,
|
|
79
|
+
id: node.taskId,
|
|
80
|
+
title: node.taskId,
|
|
81
|
+
prompt: { text: node.resolvedPrompt },
|
|
82
|
+
}, node.resolvedVariables, { rootDir: options.rootDir });
|
|
83
|
+
warnings.push(...fixtureResult.warnings);
|
|
84
|
+
// Resolve dynamic variables
|
|
85
|
+
const varResult = resolveVariables(fixtureResult.updatedVars);
|
|
86
|
+
warnings.push(...varResult.warnings);
|
|
87
|
+
// Map assertions from the node's metadata
|
|
88
|
+
// For now, nodes carry assertion data in resolvedVariables.values.__assertions
|
|
89
|
+
// (set by the TaskGraphBuilder when it has generalized task data)
|
|
90
|
+
const rawAssertions = varResult.envelope.values.__assertions ?? [];
|
|
91
|
+
const { mapped: assertions, warnings: assertionWarnings } = mapAssertions(rawAssertions, { mode: options.mode, graderProvider: options.graderProvider });
|
|
92
|
+
warnings.push(...assertionWarnings);
|
|
93
|
+
// Build test case vars (exclude internal __ prefixed keys)
|
|
94
|
+
const vars = {};
|
|
95
|
+
for (const [key, value] of Object.entries(varResult.envelope.values)) {
|
|
96
|
+
if (!key.startsWith("__")) {
|
|
97
|
+
vars[key] = value;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
// Create the gold test case
|
|
101
|
+
const goldTest = {
|
|
102
|
+
description: node.taskId,
|
|
103
|
+
vars,
|
|
104
|
+
...(assertions.length > 0 ? { assert: assertions } : {}),
|
|
105
|
+
};
|
|
106
|
+
const tests = [goldTest];
|
|
107
|
+
// For literacy/baseline mode, also create a baseline variant (no docs).
|
|
108
|
+
// Route on the node's mode (from the task definition) rather than
|
|
109
|
+
// the global options.mode, so heterogeneous graphs compile correctly.
|
|
110
|
+
if (nodeMode === LiteracyVariant.STANDARD || nodeMode === "literacy") {
|
|
111
|
+
const baselineVars = { ...vars, docs: "" };
|
|
112
|
+
const baselineTest = {
|
|
113
|
+
description: `${node.taskId} [baseline]`,
|
|
114
|
+
vars: baselineVars,
|
|
115
|
+
prompts: ["without-docs"],
|
|
116
|
+
...(assertions.length > 0 ? { assert: assertions } : {}),
|
|
117
|
+
};
|
|
118
|
+
tests.push(baselineTest);
|
|
119
|
+
}
|
|
120
|
+
return tests;
|
|
121
|
+
}
|
|
122
|
+
// ---------------------------------------------------------------------------
|
|
123
|
+
// Provider assembly
|
|
124
|
+
// ---------------------------------------------------------------------------
|
|
125
|
+
/**
|
|
126
|
+
* Build the Promptfoo providers list from the model registry.
|
|
127
|
+
*/
|
|
128
|
+
function buildProviders(models, mode) {
|
|
129
|
+
const providers = [];
|
|
130
|
+
for (const model of models.models) {
|
|
131
|
+
// Check mode compatibility
|
|
132
|
+
if (model.modes && model.modes.length > 0) {
|
|
133
|
+
if (!modelMatchesMode(model, mode))
|
|
134
|
+
continue;
|
|
135
|
+
}
|
|
136
|
+
// Check env gate
|
|
137
|
+
if (model.env && !process.env[model.env]) {
|
|
138
|
+
continue; // Skip models whose API key isn't set
|
|
139
|
+
}
|
|
140
|
+
providers.push({
|
|
141
|
+
id: model.id,
|
|
142
|
+
label: model.label,
|
|
143
|
+
config: {
|
|
144
|
+
...models.defaults,
|
|
145
|
+
...model.config,
|
|
146
|
+
},
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
return providers;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Check if a model entry matches the current evaluation mode.
|
|
153
|
+
*
|
|
154
|
+
* Literacy mode defaults to baseline model matching. Variant-specific
|
|
155
|
+
* provider filtering is handled by the provider-assembler and
|
|
156
|
+
* generate-configs-step, not here.
|
|
157
|
+
*/
|
|
158
|
+
function modelMatchesMode(model, mode) {
|
|
159
|
+
if (!model.modes || model.modes.length === 0)
|
|
160
|
+
return true;
|
|
161
|
+
switch (mode) {
|
|
162
|
+
case "literacy":
|
|
163
|
+
return model.modes.includes(LiteracyVariant.STANDARD);
|
|
164
|
+
default:
|
|
165
|
+
// Non-literacy modes accept all models by default
|
|
166
|
+
return true;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
// ---------------------------------------------------------------------------
|
|
170
|
+
// Prompt resolution
|
|
171
|
+
// ---------------------------------------------------------------------------
|
|
172
|
+
/**
|
|
173
|
+
* Resolve prompts with a three-level fallback chain:
|
|
174
|
+
* 1. handler.getPrompts() — mode-handler-owned templates
|
|
175
|
+
* 2. options.prompts — explicit caller-provided templates
|
|
176
|
+
* 3. buildDefaultPrompts() — built-in defaults per mode
|
|
177
|
+
*/
|
|
178
|
+
function resolvePrompts(options) {
|
|
179
|
+
// 1. Check handler-owned prompts
|
|
180
|
+
const handlerPrompts = options.handler?.getPrompts?.();
|
|
181
|
+
if (handlerPrompts && Object.keys(handlerPrompts).length > 0) {
|
|
182
|
+
return Object.values(handlerPrompts).map(promptTemplateToPromptfoo);
|
|
183
|
+
}
|
|
184
|
+
// 2. Check explicit override
|
|
185
|
+
if (options.prompts)
|
|
186
|
+
return options.prompts;
|
|
187
|
+
// 3. Built-in defaults
|
|
188
|
+
return buildDefaultPrompts(options.mode);
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Convert a PromptTemplate (core port type) to a PromptfooPrompt (compiler type).
|
|
192
|
+
*/
|
|
193
|
+
function promptTemplateToPromptfoo(pt) {
|
|
194
|
+
return { id: pt.id, label: pt.label, raw: pt.template };
|
|
195
|
+
}
|
|
196
|
+
// ---------------------------------------------------------------------------
|
|
197
|
+
// Default prompts
|
|
198
|
+
// ---------------------------------------------------------------------------
|
|
199
|
+
/**
|
|
200
|
+
* Build default prompt entries for a mode.
|
|
201
|
+
*
|
|
202
|
+
* Handler-owned prompts (via getPrompts()) take precedence over these
|
|
203
|
+
* built-in defaults. This fallback exists for modes that haven't yet
|
|
204
|
+
* migrated to handler-owned prompts.
|
|
205
|
+
*/
|
|
206
|
+
function buildDefaultPrompts(mode) {
|
|
207
|
+
switch (mode) {
|
|
208
|
+
case "literacy":
|
|
209
|
+
return [
|
|
210
|
+
{
|
|
211
|
+
id: "with-docs",
|
|
212
|
+
label: "With documentation context",
|
|
213
|
+
raw: "{{task}}\n\nDocumentation context:\n{{docs}}",
|
|
214
|
+
},
|
|
215
|
+
{
|
|
216
|
+
id: "without-docs",
|
|
217
|
+
label: "Without documentation context",
|
|
218
|
+
raw: "{{task}}",
|
|
219
|
+
},
|
|
220
|
+
];
|
|
221
|
+
default:
|
|
222
|
+
return [
|
|
223
|
+
{
|
|
224
|
+
id: "default",
|
|
225
|
+
label: "Default prompt",
|
|
226
|
+
raw: "{{task}}",
|
|
227
|
+
},
|
|
228
|
+
];
|
|
229
|
+
}
|
|
230
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* provider-assembler.ts — Build per-mode provider arrays from models config.
|
|
3
|
+
*
|
|
4
|
+
* Replicates the provider-building logic from the legacy generate-configs.ts
|
|
5
|
+
* so the new compiler produces identical provider configurations.
|
|
6
|
+
*
|
|
7
|
+
* Separated into its own module so GenerateConfigsStep can import it
|
|
8
|
+
* without pulling in the full legacy generate-configs machinery.
|
|
9
|
+
*/
|
|
10
|
+
import { type ModelsConfig } from "../../_vendor/ailf-core/index.d.ts";
|
|
11
|
+
import type { ResolvedSourceConfig } from "../../sources.js";
|
|
12
|
+
/**
|
|
13
|
+
* Provider arrays grouped by literacy variant.
|
|
14
|
+
*
|
|
15
|
+
* These keys are literacy variant names (not EvalMode values). Each variant
|
|
16
|
+
* needs a different set of model providers with variant-specific config
|
|
17
|
+
* (e.g., agentic providers carry tool-use config, observed providers carry
|
|
18
|
+
* observer instrumentation).
|
|
19
|
+
*/
|
|
20
|
+
export interface LiteracyVariantProviders {
|
|
21
|
+
baseline: Record<string, unknown>[];
|
|
22
|
+
agentic: Record<string, unknown>[];
|
|
23
|
+
observed: Record<string, unknown>[];
|
|
24
|
+
}
|
|
25
|
+
/** @deprecated Use LiteracyVariantProviders — kept for backward compatibility */
|
|
26
|
+
export type AssembledProviders = LiteracyVariantProviders;
|
|
27
|
+
/** Result of loading models and assembling providers */
|
|
28
|
+
export interface ModelsAndProviders {
|
|
29
|
+
models: ModelsConfig;
|
|
30
|
+
providers: LiteracyVariantProviders;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Load models config and assemble provider arrays for literacy variants.
|
|
34
|
+
*
|
|
35
|
+
* Returns provider arrays keyed by literacy variant name (baseline,
|
|
36
|
+
* agentic, observed). These are consumed by the YAML writer to produce
|
|
37
|
+
* the per-variant promptfoo config files.
|
|
38
|
+
*/
|
|
39
|
+
export declare function loadModelsAndProviders(rootDir: string, source?: ResolvedSourceConfig, searchMode?: string, allowedOrigins?: string[]): ModelsAndProviders;
|