@sanity/ailf 0.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/config/features.ts +23 -0
- package/config/models.ts +95 -0
- package/config/prompts.ts +16 -0
- package/config/rubrics.ts +225 -0
- package/config/schedules.ts +47 -0
- package/config/sinks.ts +37 -0
- package/config/sources.ts +21 -0
- package/config/thresholds.ts +61 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +171 -0
- package/dist/_vendor/ailf-core/config-helpers.js +170 -0
- package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
- package/dist/_vendor/ailf-core/env-helper.js +45 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/examples/index.js +25 -0
- package/dist/_vendor/ailf-core/index.d.ts +3 -0
- package/dist/_vendor/ailf-core/index.js +5 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +17 -2
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
- package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
- package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +8 -2
- package/dist/_vendor/ailf-core/schemas/eval-config.js +17 -2
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +9 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +8 -1
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -31
- package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -9
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
- package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
- package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/services/index.js +2 -1
- package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
- package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
- package/dist/_vendor/ailf-core/services/scoring.js +25 -15
- package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
- package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +332 -0
- package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +45 -83
- package/dist/_vendor/ailf-core/types/index.js +8 -1
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +257 -0
- package/dist/_vendor/ailf-core/types/plugin-registry.js +185 -0
- package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
- package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
- package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
- package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
- package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
- package/dist/_vendor/ailf-core/types/trace.js +18 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
- package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
- package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
- package/dist/_vendor/ailf-shared/index.d.ts +0 -1
- package/dist/_vendor/ailf-shared/index.js +0 -1
- package/dist/adapters/api-client/build-request.js +14 -13
- package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
- package/dist/adapters/config-sources/file-config-adapter.js +39 -12
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +1 -0
- package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
- package/dist/adapters/config-sources/ts-config-loader.js +141 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
- package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +35 -39
- package/dist/adapters/task-sources/index.d.ts +3 -2
- package/dist/adapters/task-sources/index.js +3 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
- package/dist/adapters/task-sources/repo-schemas.js +227 -19
- package/dist/adapters/task-sources/repo-task-source.d.ts +16 -12
- package/dist/adapters/task-sources/repo-task-source.js +92 -80
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
- package/dist/adapters/task-sources/task-file-loader.js +83 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
- package/dist/adapters/task-sources/yaml-task-source.js +19 -16
- package/dist/cli.js +0 -2
- package/dist/commands/baseline.js +4 -1
- package/dist/commands/calculate-scores.js +1 -1
- package/dist/commands/coverage-audit.js +9 -1
- package/dist/commands/explain-handler.js +25 -23
- package/dist/commands/fetch-docs.js +3 -2
- package/dist/commands/generate-configs.js +1 -1
- package/dist/commands/init.d.ts +6 -4
- package/dist/commands/init.js +302 -23
- package/dist/commands/interactive.js +11 -7
- package/dist/commands/pipeline-action.d.ts +2 -0
- package/dist/commands/pipeline-action.js +16 -6
- package/dist/commands/pipeline.d.ts +1 -0
- package/dist/commands/pipeline.js +4 -2
- package/dist/commands/pr-comment.js +1 -1
- package/dist/commands/publish.js +2 -2
- package/dist/commands/readiness-report.js +13 -6
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +13 -1
- package/dist/composition-root.js +99 -4
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/orchestration/build-app-context.js +1 -0
- package/dist/orchestration/build-step-sequence.js +28 -8
- package/dist/orchestration/steps/calculate-scores-step.js +24 -11
- package/dist/orchestration/steps/fetch-docs-step.js +8 -7
- package/dist/orchestration/steps/gap-analysis-step.js +8 -7
- package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
- package/dist/orchestration/steps/generate-configs-step.js +261 -51
- package/dist/orchestration/steps/grader-consistency-step.js +7 -4
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/readiness-step.js +5 -6
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
- package/dist/orchestration/steps/run-eval-step.js +8 -7
- package/dist/pipeline/cache.d.ts +1 -1
- package/dist/pipeline/cache.js +36 -8
- package/dist/pipeline/calculate-scores.d.ts +2 -4
- package/dist/pipeline/calculate-scores.js +43 -113
- package/dist/pipeline/checks.js +2 -2
- package/dist/pipeline/compare.js +8 -8
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +392 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +404 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
- package/dist/pipeline/compiler/assertion-mapper.js +175 -0
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
- package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
- package/dist/pipeline/compiler/config-loader.d.ts +56 -0
- package/dist/pipeline/compiler/config-loader.js +111 -0
- package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
- package/dist/pipeline/compiler/fixture-resolver.js +113 -0
- package/dist/pipeline/compiler/hash.d.ts +11 -0
- package/dist/pipeline/compiler/hash.js +18 -0
- package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
- package/dist/pipeline/compiler/ignore-fields.js +113 -0
- package/dist/pipeline/compiler/index.d.ts +29 -0
- package/dist/pipeline/compiler/index.js +45 -0
- package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
- package/dist/pipeline/compiler/literacy-bridge.js +172 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +15 -0
- package/dist/pipeline/compiler/mode-handlers/index.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/index.d.ts +9 -0
- package/dist/pipeline/compiler/presets/index.js +8 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +42 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.js +208 -0
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
- package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
- package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
- package/dist/pipeline/compiler/provider-assembler.js +137 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
- package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
- package/dist/pipeline/compiler/sandbox/index.js +11 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
- package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
- package/dist/pipeline/compiler/scoring-bridge.js +114 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
- package/dist/pipeline/compiler/task-graph-builder.js +291 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
- package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
- package/dist/pipeline/compiler/telemetry/index.js +19 -0
- package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
- package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
- package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
- package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
- package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
- package/dist/pipeline/compiler/variable-resolver.js +115 -0
- package/dist/pipeline/coverage-audit.d.ts +15 -5
- package/dist/pipeline/coverage-audit.js +41 -22
- package/dist/pipeline/eval-constants.d.ts +16 -6
- package/dist/pipeline/eval-constants.js +25 -4
- package/dist/pipeline/eval-fingerprint.d.ts +2 -2
- package/dist/pipeline/eval-fingerprint.js +8 -9
- package/dist/pipeline/expand-tasks.d.ts +19 -10
- package/dist/pipeline/expand-tasks.js +34 -28
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +2 -2
- package/dist/pipeline/generate-configs.d.ts +22 -4
- package/dist/pipeline/generate-configs.js +53 -24
- package/dist/pipeline/grader-api.d.ts +3 -3
- package/dist/pipeline/grader-api.js +5 -12
- package/dist/pipeline/grader-compare-runner.js +20 -27
- package/dist/pipeline/grader-comparison.d.ts +4 -8
- package/dist/pipeline/grader-comparison.js +11 -17
- package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
- package/dist/pipeline/grader-consistency-runner.js +16 -20
- package/dist/pipeline/grader-consistency.d.ts +6 -10
- package/dist/pipeline/grader-consistency.js +13 -32
- package/dist/pipeline/grader-sensitivity-runner.js +7 -5
- package/dist/pipeline/grader-sensitivity.d.ts +2 -6
- package/dist/pipeline/grader-sensitivity.js +10 -10
- package/dist/pipeline/grader-validate-runner.js +7 -5
- package/dist/pipeline/grader-validation.d.ts +2 -6
- package/dist/pipeline/grader-validation.js +14 -22
- package/dist/pipeline/map-request-to-config.js +7 -1
- package/dist/pipeline/mirror-repo-tasks.d.ts +13 -13
- package/dist/pipeline/mirror-repo-tasks.js +22 -21
- package/dist/pipeline/normalize-mode.d.ts +49 -0
- package/dist/pipeline/normalize-mode.js +64 -0
- package/dist/pipeline/plan.d.ts +5 -2
- package/dist/pipeline/plan.js +134 -78
- package/dist/pipeline/pr-comment.js +2 -0
- package/dist/pipeline/profile-resolution.d.ts +22 -14
- package/dist/pipeline/profile-resolution.js +41 -19
- package/dist/pipeline/provenance.d.ts +2 -2
- package/dist/pipeline/provenance.js +12 -17
- package/dist/pipeline/release-report.js +4 -4
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/rubric-loader.d.ts +20 -0
- package/dist/pipeline/rubric-loader.js +37 -0
- package/dist/pipeline/validate.d.ts +4 -4
- package/dist/pipeline/validate.js +64 -53
- package/dist/schedules/loader.js +18 -8
- package/dist/scripts/migrate-task-mode.d.ts +24 -0
- package/dist/scripts/migrate-task-mode.js +85 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +15 -15
- package/dist/sinks/loader.js +5 -7
- package/dist/sources.d.ts +7 -7
- package/dist/sources.js +22 -24
- package/dist/webhook/dispatch.js +2 -1
- package/package.json +15 -4
- package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
- package/tasks/literacy/frameworks.task.ts +128 -0
- package/tasks/literacy/functions.task.ts +69 -0
- package/tasks/literacy/groq.task.ts +258 -0
- package/tasks/literacy/nextjs-live.task.ts +75 -0
- package/tasks/literacy/studio-setup.task.ts +131 -0
- package/tasks/literacy/visual-editing.task.ts +146 -0
- package/config/features.yaml +0 -116
- package/config/models.yaml +0 -116
- package/config/prompts.yaml +0 -75
- package/config/rubrics.yaml +0 -81
- package/config/schedules.yaml +0 -43
- package/config/sinks.yaml +0 -54
- package/config/sources.yaml +0 -51
- package/config/thresholds.yaml +0 -49
- package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
- package/dist/_vendor/ailf-tasks/cli.js +0 -61
- package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
- package/dist/_vendor/ailf-tasks/index.js +0 -16
- package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
- package/dist/_vendor/ailf-tasks/parser.js +0 -73
- package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
- package/dist/_vendor/ailf-tasks/schemas.js +0 -180
- package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
- package/dist/_vendor/ailf-tasks/validation.js +0 -162
- package/dist/agent-observer/test-imports.d.ts +0 -7
- package/dist/agent-observer/test-imports.js +0 -185
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Assertion type mapper — maps AILF assertion types to Promptfoo assertion types.
|
|
3
|
+
*
|
|
4
|
+
* AILF assertions have two flavors:
|
|
5
|
+
* 1. Templated assertions (`type: "llm-rubric"` with `template` + `criteria`)
|
|
6
|
+
* → resolved into Promptfoo's `llm-rubric` with a fully assembled rubric prompt
|
|
7
|
+
* 2. Value assertions (any other `type` with a `value`)
|
|
8
|
+
* → passed through to Promptfoo mostly as-is
|
|
9
|
+
*
|
|
10
|
+
* This module handles the mapping for both, validates mode compatibility
|
|
11
|
+
* (e.g., `tool-called` is only valid for agent-harness/mcp-server modes),
|
|
12
|
+
* and normalizes weight fields.
|
|
13
|
+
*
|
|
14
|
+
* @see docs/design-docs/architecture-overhaul/scoring-rubrics-assertions.md
|
|
15
|
+
* @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
|
|
16
|
+
*/
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
// Known assertion types and their mode compatibility
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
/** Assertion types supported by AILF and their Promptfoo equivalents */
|
|
21
|
+
const ASSERTION_TYPE_MAP = {
|
|
22
|
+
// Deterministic
|
|
23
|
+
contains: "contains",
|
|
24
|
+
"contains-all": "contains-all",
|
|
25
|
+
"contains-any": "contains-any",
|
|
26
|
+
"contains-json": "contains-json",
|
|
27
|
+
cost: "cost",
|
|
28
|
+
equals: "equals",
|
|
29
|
+
"is-json": "is-json",
|
|
30
|
+
"is-valid-openai-function-call": "is-valid-openai-function-call",
|
|
31
|
+
"is-valid-openai-tools-call": "is-valid-openai-tools-call",
|
|
32
|
+
latency: "latency",
|
|
33
|
+
regex: "regex",
|
|
34
|
+
// Programmatic
|
|
35
|
+
javascript: "javascript",
|
|
36
|
+
python: "python",
|
|
37
|
+
// LLM-graded
|
|
38
|
+
"g-eval": "g-eval",
|
|
39
|
+
"llm-rubric": "llm-rubric",
|
|
40
|
+
"model-graded-closedqa": "model-graded-closedqa",
|
|
41
|
+
"model-graded-factuality": "model-graded-factuality",
|
|
42
|
+
similar: "similar",
|
|
43
|
+
// Tool-use (agent/MCP modes)
|
|
44
|
+
"skill-used": "javascript", // Custom JS assertion checking tool calls
|
|
45
|
+
"tool-call-f1": "javascript", // Custom JS assertion computing F1
|
|
46
|
+
"tool-called": "javascript", // Custom JS assertion checking specific tool
|
|
47
|
+
};
|
|
48
|
+
/** Assertion types restricted to specific modes.
|
|
49
|
+
* "literacy" is included because the agentic literacy variant uses tool calls. */
|
|
50
|
+
const MODE_RESTRICTED_TYPES = {
|
|
51
|
+
"skill-used": ["agent-harness", "mcp-server", "literacy"],
|
|
52
|
+
"tool-call-f1": ["agent-harness", "mcp-server", "literacy"],
|
|
53
|
+
"tool-called": ["agent-harness", "mcp-server", "literacy"],
|
|
54
|
+
};
|
|
55
|
+
/** Negatable assertion types (Promptfoo supports `not-` prefix) */
|
|
56
|
+
const NEGATABLE_TYPES = new Set([
|
|
57
|
+
"contains",
|
|
58
|
+
"contains-all",
|
|
59
|
+
"contains-any",
|
|
60
|
+
"equals",
|
|
61
|
+
"is-json",
|
|
62
|
+
"regex",
|
|
63
|
+
]);
|
|
64
|
+
/**
|
|
65
|
+
* Map an array of AILF assertions to Promptfoo assertions.
|
|
66
|
+
*
|
|
67
|
+
* @param assertions - AILF assertion definitions
|
|
68
|
+
* @param options - Mapper options
|
|
69
|
+
* @returns Mapped Promptfoo assertions and any warnings
|
|
70
|
+
*/
|
|
71
|
+
export function mapAssertions(assertions, options) {
|
|
72
|
+
const mapped = [];
|
|
73
|
+
const warnings = [];
|
|
74
|
+
for (const assertion of assertions) {
|
|
75
|
+
const result = mapSingleAssertion(assertion, options);
|
|
76
|
+
if (result.warning) {
|
|
77
|
+
warnings.push(result.warning);
|
|
78
|
+
}
|
|
79
|
+
if (result.assertion) {
|
|
80
|
+
mapped.push(result.assertion);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return { mapped, warnings };
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Check if an assertion type is valid.
|
|
87
|
+
*/
|
|
88
|
+
export function isValidAssertionType(type) {
|
|
89
|
+
const baseType = type.startsWith("not-") ? type.slice(4) : type;
|
|
90
|
+
return baseType in ASSERTION_TYPE_MAP;
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Check if an assertion type is compatible with a given mode.
|
|
94
|
+
*/
|
|
95
|
+
export function isAssertionCompatibleWithMode(type, mode) {
|
|
96
|
+
const baseType = type.startsWith("not-") ? type.slice(4) : type;
|
|
97
|
+
const restrictions = MODE_RESTRICTED_TYPES[baseType];
|
|
98
|
+
if (!restrictions)
|
|
99
|
+
return true; // No restrictions
|
|
100
|
+
return restrictions.includes(mode);
|
|
101
|
+
}
|
|
102
|
+
// ---------------------------------------------------------------------------
|
|
103
|
+
// Internal mapping
|
|
104
|
+
// ---------------------------------------------------------------------------
|
|
105
|
+
function mapSingleAssertion(assertion, options) {
|
|
106
|
+
const { type } = assertion;
|
|
107
|
+
// Check mode compatibility
|
|
108
|
+
if (options?.mode) {
|
|
109
|
+
if (!isAssertionCompatibleWithMode(type, options.mode)) {
|
|
110
|
+
return {
|
|
111
|
+
assertion: null,
|
|
112
|
+
warning: `Assertion type "${type}" is not compatible with mode "${options.mode}" — skipped`,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
// Handle negation
|
|
117
|
+
const isNegated = type.startsWith("not-");
|
|
118
|
+
const baseType = isNegated ? type.slice(4) : type;
|
|
119
|
+
if (isNegated && !NEGATABLE_TYPES.has(baseType)) {
|
|
120
|
+
return {
|
|
121
|
+
assertion: null,
|
|
122
|
+
warning: `Assertion type "${type}" does not support negation — skipped`,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
// Templated assertions (llm-rubric with template + criteria)
|
|
126
|
+
if (isTemplated(assertion)) {
|
|
127
|
+
return {
|
|
128
|
+
assertion: mapTemplatedAssertion(assertion, options),
|
|
129
|
+
warning: null,
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
// Value assertions — map type and pass through
|
|
133
|
+
const promptfooType = ASSERTION_TYPE_MAP[baseType];
|
|
134
|
+
if (!promptfooType) {
|
|
135
|
+
// Unknown type — pass through as-is (Promptfoo may support it natively)
|
|
136
|
+
return {
|
|
137
|
+
assertion: mapValueAssertion(assertion, type),
|
|
138
|
+
warning: `Unknown assertion type "${type}" — passed through to Promptfoo as-is`,
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
const mappedType = isNegated ? `not-${promptfooType}` : promptfooType;
|
|
142
|
+
return {
|
|
143
|
+
assertion: mapValueAssertion(assertion, mappedType),
|
|
144
|
+
warning: null,
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
function isTemplated(assertion) {
|
|
148
|
+
return (assertion.type === "llm-rubric" &&
|
|
149
|
+
"template" in assertion &&
|
|
150
|
+
"criteria" in assertion);
|
|
151
|
+
}
|
|
152
|
+
function mapTemplatedAssertion(assertion, options) {
|
|
153
|
+
const result = {
|
|
154
|
+
type: "llm-rubric",
|
|
155
|
+
// The rubric prompt will be fully assembled by the PromptfooCompiler
|
|
156
|
+
// using rubric templates. Here we pass the template ref + criteria
|
|
157
|
+
// as metadata so the compiler can resolve it.
|
|
158
|
+
value: `[template:${assertion.template}] ${assertion.criteria.join("; ")}`,
|
|
159
|
+
};
|
|
160
|
+
if (assertion.weight !== undefined) {
|
|
161
|
+
result.weight = assertion.weight;
|
|
162
|
+
}
|
|
163
|
+
if (options?.graderProvider) {
|
|
164
|
+
result.provider = options.graderProvider;
|
|
165
|
+
}
|
|
166
|
+
return result;
|
|
167
|
+
}
|
|
168
|
+
function mapValueAssertion(assertion, mappedType) {
|
|
169
|
+
const { type: _type, ...rest } = assertion;
|
|
170
|
+
const result = {
|
|
171
|
+
type: mappedType,
|
|
172
|
+
...rest,
|
|
173
|
+
};
|
|
174
|
+
return result;
|
|
175
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* compiler-to-yaml.ts — Serialize compiled Promptfoo config to YAML files.
|
|
3
|
+
*
|
|
4
|
+
* This is the bridge between the new compiler pipeline (in-memory
|
|
5
|
+
* CompiledPromptfooConfig) and the existing RunEvalStep which reads
|
|
6
|
+
* YAML config files from disk.
|
|
7
|
+
*
|
|
8
|
+
* The output YAML files are identical in structure to what the legacy
|
|
9
|
+
* generate-configs.ts produces, so RunEvalStep, CalculateScoresStep,
|
|
10
|
+
* and all downstream steps work without modification.
|
|
11
|
+
*
|
|
12
|
+
* @see packages/eval/src/pipeline/generate-configs.ts — legacy path
|
|
13
|
+
* @see packages/eval/src/orchestration/steps/run-eval-step.ts — consumer
|
|
14
|
+
*/
|
|
15
|
+
import type { Logger, ModeCompileResult } from "../../_vendor/ailf-core/index.d.ts";
|
|
16
|
+
/** Options for writing compiled config to YAML */
|
|
17
|
+
export interface WriteCompiledConfigOptions {
|
|
18
|
+
/** Root directory of the eval package */
|
|
19
|
+
rootDir: string;
|
|
20
|
+
/** Grader provider ID (e.g., "openai:chat:gpt-5") */
|
|
21
|
+
graderProvider: string;
|
|
22
|
+
/** Max concurrency for evaluation */
|
|
23
|
+
maxConcurrency?: number;
|
|
24
|
+
/** Logger instance */
|
|
25
|
+
logger?: Logger;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Write compiled literacy results to the same YAML files the legacy
|
|
29
|
+
* path produces, so RunEvalStep can consume them unchanged.
|
|
30
|
+
*
|
|
31
|
+
* Writes:
|
|
32
|
+
* - tasks/.expanded.yaml (gold + baseline test entries for baseline mode)
|
|
33
|
+
* - tasks/.expanded.agentic.yaml (gold-only entries for agentic mode)
|
|
34
|
+
* Write a single Promptfoo config YAML file for any non-literacy mode.
|
|
35
|
+
*
|
|
36
|
+
* This is the generic writer that works with any ModeCompileResult.
|
|
37
|
+
* Literacy uses its own multi-file writer below.
|
|
38
|
+
*/
|
|
39
|
+
export declare function writeCompiledModeConfig(result: ModeCompileResult, mode: string, options: WriteCompiledConfigOptions): void;
|
|
40
|
+
/**
|
|
41
|
+
* Write three literacy-specific Promptfoo config YAML files.
|
|
42
|
+
*
|
|
43
|
+
* - promptfooconfig.yaml (baseline config)
|
|
44
|
+
* - promptfooconfig.agentic.yaml (agentic config)
|
|
45
|
+
* - promptfooconfig.observed.yaml (observed config — same structure as baseline)
|
|
46
|
+
*/
|
|
47
|
+
export declare function writeCompiledLiteracyConfigs(baselineResult: ModeCompileResult, agenticResult: ModeCompileResult, providers: {
|
|
48
|
+
baseline: Record<string, unknown>[];
|
|
49
|
+
agentic: Record<string, unknown>[];
|
|
50
|
+
observed: Record<string, unknown>[];
|
|
51
|
+
}, options: WriteCompiledConfigOptions): void;
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* compiler-to-yaml.ts — Serialize compiled Promptfoo config to YAML files.
|
|
3
|
+
*
|
|
4
|
+
* This is the bridge between the new compiler pipeline (in-memory
|
|
5
|
+
* CompiledPromptfooConfig) and the existing RunEvalStep which reads
|
|
6
|
+
* YAML config files from disk.
|
|
7
|
+
*
|
|
8
|
+
* The output YAML files are identical in structure to what the legacy
|
|
9
|
+
* generate-configs.ts produces, so RunEvalStep, CalculateScoresStep,
|
|
10
|
+
* and all downstream steps work without modification.
|
|
11
|
+
*
|
|
12
|
+
* @see packages/eval/src/pipeline/generate-configs.ts — legacy path
|
|
13
|
+
* @see packages/eval/src/orchestration/steps/run-eval-step.ts — consumer
|
|
14
|
+
*/
|
|
15
|
+
import { existsSync, mkdirSync, writeFileSync } from "fs";
|
|
16
|
+
import { resolve } from "path";
|
|
17
|
+
import { dump } from "js-yaml";
|
|
18
|
+
/**
|
|
19
|
+
* URL extraction assertion — advisory (weight: 0).
|
|
20
|
+
* Extracts all URLs from the output for telemetry.
|
|
21
|
+
* Matches legacy generate-configs.ts behavior.
|
|
22
|
+
*/
|
|
23
|
+
const URL_EXTRACTION_ASSERT = {
|
|
24
|
+
type: "javascript",
|
|
25
|
+
value: `const urlPattern = /https?:\\/\\/[^\\s\\)\\"\\'\\\`>]+/g;
|
|
26
|
+
const urls = [...new Set((output.match(urlPattern) || []))];
|
|
27
|
+
const sanityUrls = urls.filter(u => u.includes('sanity.io'));
|
|
28
|
+
return {
|
|
29
|
+
pass: true,
|
|
30
|
+
score: 1,
|
|
31
|
+
reason: JSON.stringify({
|
|
32
|
+
sanityUrls,
|
|
33
|
+
otherUrls: urls.filter(u => !u.includes('sanity.io')),
|
|
34
|
+
totalUrlCount: urls.length,
|
|
35
|
+
sanityUrlCount: sanityUrls.length
|
|
36
|
+
})
|
|
37
|
+
};`,
|
|
38
|
+
weight: 0,
|
|
39
|
+
};
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
// Public API
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
/**
|
|
44
|
+
* Write compiled literacy results to the same YAML files the legacy
|
|
45
|
+
* path produces, so RunEvalStep can consume them unchanged.
|
|
46
|
+
*
|
|
47
|
+
* Writes:
|
|
48
|
+
* - tasks/.expanded.yaml (gold + baseline test entries for baseline mode)
|
|
49
|
+
* - tasks/.expanded.agentic.yaml (gold-only entries for agentic mode)
|
|
50
|
+
* Write a single Promptfoo config YAML file for any non-literacy mode.
|
|
51
|
+
*
|
|
52
|
+
* This is the generic writer that works with any ModeCompileResult.
|
|
53
|
+
* Literacy uses its own multi-file writer below.
|
|
54
|
+
*/
|
|
55
|
+
export function writeCompiledModeConfig(result, mode, options) {
|
|
56
|
+
const filename = `promptfooconfig.${mode}.yaml`;
|
|
57
|
+
const outputPath = `results/latest/eval-results-${mode}.json`;
|
|
58
|
+
// Ensure results directory exists
|
|
59
|
+
const resultsDir = resolve(options.rootDir, "results/latest");
|
|
60
|
+
if (!existsSync(resultsDir)) {
|
|
61
|
+
mkdirSync(resultsDir, { recursive: true });
|
|
62
|
+
}
|
|
63
|
+
// Write expanded test entries to a JSON file
|
|
64
|
+
const testsFilename = `results/latest/${mode}-tests.json`;
|
|
65
|
+
const testsPath = resolve(options.rootDir, testsFilename);
|
|
66
|
+
const expandedTests = testsToExpandedEntries(result.tests);
|
|
67
|
+
writeFileSync(testsPath, JSON.stringify(expandedTests, null, 2), "utf-8");
|
|
68
|
+
// Build grader options
|
|
69
|
+
const graderOpts = {};
|
|
70
|
+
if (options.graderProvider) {
|
|
71
|
+
graderOpts.provider = options.graderProvider;
|
|
72
|
+
}
|
|
73
|
+
// Build provider entries
|
|
74
|
+
const providerEntries = result.providers.map((p) => {
|
|
75
|
+
if (p.config)
|
|
76
|
+
return { id: p.id, label: p.label, config: p.config };
|
|
77
|
+
return p.label ? { id: p.id, label: p.label } : p.id;
|
|
78
|
+
});
|
|
79
|
+
// Build prompt entries
|
|
80
|
+
const prompts = result.prompts.map((p) => ({
|
|
81
|
+
id: p.id,
|
|
82
|
+
label: p.label,
|
|
83
|
+
raw: p.raw,
|
|
84
|
+
}));
|
|
85
|
+
// Assemble and write
|
|
86
|
+
const config = assembleConfig({
|
|
87
|
+
description: `AILF ${mode} evaluation`,
|
|
88
|
+
graderOpts,
|
|
89
|
+
maxConcurrency: options.maxConcurrency,
|
|
90
|
+
outputPath,
|
|
91
|
+
prompts,
|
|
92
|
+
providers: providerEntries,
|
|
93
|
+
tests: [testsFilename],
|
|
94
|
+
});
|
|
95
|
+
// Include extensions if present (agent-harness mode)
|
|
96
|
+
if (result.extras?.extensions) {
|
|
97
|
+
;
|
|
98
|
+
config.extensions =
|
|
99
|
+
result.extras.extensions;
|
|
100
|
+
}
|
|
101
|
+
writeConfig(options.rootDir, filename, config, options.logger);
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Write three literacy-specific Promptfoo config YAML files.
|
|
105
|
+
*
|
|
106
|
+
* - promptfooconfig.yaml (baseline config)
|
|
107
|
+
* - promptfooconfig.agentic.yaml (agentic config)
|
|
108
|
+
* - promptfooconfig.observed.yaml (observed config — same structure as baseline)
|
|
109
|
+
*/
|
|
110
|
+
export function writeCompiledLiteracyConfigs(baselineResult, agenticResult, providers, options) {
|
|
111
|
+
const { rootDir, graderProvider, logger: log } = options;
|
|
112
|
+
// Ensure tasks/ directory exists
|
|
113
|
+
const tasksDir = resolve(rootDir, "tasks");
|
|
114
|
+
if (!existsSync(tasksDir)) {
|
|
115
|
+
mkdirSync(tasksDir, { recursive: true });
|
|
116
|
+
}
|
|
117
|
+
// Write expanded test entries (same format as expand-tasks.ts output)
|
|
118
|
+
const baselineEntries = testsToExpandedEntries(baselineResult.tests);
|
|
119
|
+
const agenticEntries = testsToExpandedEntries(agenticResult.tests);
|
|
120
|
+
writeYaml(resolve(rootDir, "tasks", ".expanded.yaml"), baselineEntries, "# .expanded.yaml\n#\n# AUTO-GENERATED by compiler pipeline — do not edit directly.\n# Run: npx @sanity/ailf generate-configs\n");
|
|
121
|
+
log?.info(` ✓ tasks/.expanded.yaml (${baselineEntries.length} entries)`);
|
|
122
|
+
writeYaml(resolve(rootDir, "tasks", ".expanded.agentic.yaml"), agenticEntries, "# .expanded.agentic.yaml\n#\n# AUTO-GENERATED by compiler pipeline — do not edit directly.\n# Gold entries only (no baseline) for agentic evaluation mode.\n# Run: npx @sanity/ailf generate-configs\n");
|
|
123
|
+
log?.info(` ✓ tasks/.expanded.agentic.yaml (${agenticEntries.length} entries)`);
|
|
124
|
+
const taskFiles = ["file://tasks/.expanded.yaml"];
|
|
125
|
+
const agenticTaskFiles = ["file://tasks/.expanded.agentic.yaml"];
|
|
126
|
+
// Assemble and write promptfoo config files
|
|
127
|
+
const graderOpts = {
|
|
128
|
+
provider: graderProvider,
|
|
129
|
+
rubricProvider: graderProvider,
|
|
130
|
+
};
|
|
131
|
+
const maxConc = options.maxConcurrency;
|
|
132
|
+
// Baseline config
|
|
133
|
+
const baselineConfig = assembleConfig({
|
|
134
|
+
description: "Sanity AI Literacy Evaluation — Baseline",
|
|
135
|
+
prompts: baselineResult.prompts,
|
|
136
|
+
providers: providers.baseline,
|
|
137
|
+
tests: taskFiles,
|
|
138
|
+
outputPath: "results/latest/eval-results.json",
|
|
139
|
+
graderOpts,
|
|
140
|
+
maxConcurrency: maxConc,
|
|
141
|
+
});
|
|
142
|
+
writeConfig(rootDir, "promptfooconfig.yaml", baselineConfig, log);
|
|
143
|
+
// Observed config (same structure as baseline, different providers)
|
|
144
|
+
const observedConfig = assembleConfig({
|
|
145
|
+
description: "Sanity AI Literacy Evaluation — Observed",
|
|
146
|
+
prompts: baselineResult.prompts,
|
|
147
|
+
providers: providers.observed,
|
|
148
|
+
tests: taskFiles,
|
|
149
|
+
outputPath: "results/latest/eval-results-observed.json",
|
|
150
|
+
graderOpts,
|
|
151
|
+
maxConcurrency: maxConc,
|
|
152
|
+
});
|
|
153
|
+
writeConfig(rootDir, "promptfooconfig.observed.yaml", observedConfig, log);
|
|
154
|
+
// Agentic config
|
|
155
|
+
const agenticConfig = assembleConfig({
|
|
156
|
+
description: "Sanity AI Literacy Evaluation — Agentic (naive vs optimized)",
|
|
157
|
+
prompts: agenticResult.prompts,
|
|
158
|
+
providers: providers.agentic,
|
|
159
|
+
tests: agenticTaskFiles,
|
|
160
|
+
outputPath: "results/latest/eval-results-agentic.json",
|
|
161
|
+
graderOpts,
|
|
162
|
+
maxConcurrency: maxConc,
|
|
163
|
+
});
|
|
164
|
+
writeConfig(rootDir, "promptfooconfig.agentic.yaml", agenticConfig, log);
|
|
165
|
+
log?.info("\nDone! Configs are ready (via compiler pipeline).");
|
|
166
|
+
}
|
|
167
|
+
// ---------------------------------------------------------------------------
|
|
168
|
+
// Helpers
|
|
169
|
+
// ---------------------------------------------------------------------------
|
|
170
|
+
/** Convert compiler test cases to expanded entry format (matching legacy YAML) */
|
|
171
|
+
function testsToExpandedEntries(tests) {
|
|
172
|
+
return tests.map((test) => {
|
|
173
|
+
const entry = {};
|
|
174
|
+
if (test.description)
|
|
175
|
+
entry.description = test.description;
|
|
176
|
+
if (test.vars)
|
|
177
|
+
entry.vars = test.vars;
|
|
178
|
+
if (test.assert && test.assert.length > 0)
|
|
179
|
+
entry.assert = test.assert;
|
|
180
|
+
if (test.prompts)
|
|
181
|
+
entry.prompts = test.prompts;
|
|
182
|
+
return entry;
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
function assembleConfig(opts) {
|
|
186
|
+
return {
|
|
187
|
+
commandLineOptions: { table: false },
|
|
188
|
+
defaultTest: {
|
|
189
|
+
assert: [URL_EXTRACTION_ASSERT],
|
|
190
|
+
options: opts.graderOpts,
|
|
191
|
+
},
|
|
192
|
+
description: opts.description,
|
|
193
|
+
...(opts.maxConcurrency
|
|
194
|
+
? { evaluateOptions: { maxConcurrency: opts.maxConcurrency } }
|
|
195
|
+
: {}),
|
|
196
|
+
outputPath: opts.outputPath,
|
|
197
|
+
prompts: opts.prompts,
|
|
198
|
+
providers: opts.providers,
|
|
199
|
+
tests: opts.tests,
|
|
200
|
+
};
|
|
201
|
+
}
|
|
202
|
+
function writeConfig(rootDir, filename, config, log) {
|
|
203
|
+
const yamlStr = dump(config, {
|
|
204
|
+
forceQuotes: false,
|
|
205
|
+
lineWidth: 120,
|
|
206
|
+
noRefs: true,
|
|
207
|
+
quotingType: "'",
|
|
208
|
+
});
|
|
209
|
+
const header = `# ${filename}\n#\n# AUTO-GENERATED by compiler pipeline — do not edit directly.\n# Run: npx @sanity/ailf generate-configs\n`;
|
|
210
|
+
const outPath = resolve(rootDir, filename);
|
|
211
|
+
writeFileSync(outPath, `${header}\n${yamlStr}`, "utf-8");
|
|
212
|
+
log?.info(` ✓ ${filename}`);
|
|
213
|
+
}
|
|
214
|
+
function writeYaml(path, data, header) {
|
|
215
|
+
const yamlStr = dump(data, {
|
|
216
|
+
forceQuotes: false,
|
|
217
|
+
lineWidth: 120,
|
|
218
|
+
noRefs: true,
|
|
219
|
+
quotingType: "'",
|
|
220
|
+
});
|
|
221
|
+
writeFileSync(path, `${header}\n${yamlStr}`, "utf-8");
|
|
222
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified config file loader — resolves TS/JS/YAML/JSON with format priority.
|
|
3
|
+
*
|
|
4
|
+
* Provides a single `loadConfigFile()` function that replaces the scattered
|
|
5
|
+
* `readFileSync + load(raw)` pattern throughout the pipeline. Checks for
|
|
6
|
+
* TypeScript/JavaScript config files first (via jiti), falling back to YAML
|
|
7
|
+
* and JSON — matching the resolution order established in Phase 1.
|
|
8
|
+
*
|
|
9
|
+
* Priority: `.ts` > `.js` > `.yaml` > `.yml` > `.json`
|
|
10
|
+
*
|
|
11
|
+
* Usage:
|
|
12
|
+
* ```ts
|
|
13
|
+
* // Before (scattered pattern):
|
|
14
|
+
* const raw = readFileSync(resolve(rootDir, "config", "models config"), "utf-8")
|
|
15
|
+
* const config = load(raw) as ModelsConfig
|
|
16
|
+
*
|
|
17
|
+
* // After:
|
|
18
|
+
* const config = loadConfigFile<ModelsConfig>("models", rootDir)
|
|
19
|
+
* ```
|
|
20
|
+
*
|
|
21
|
+
* @see packages/eval/src/adapters/config-sources/ts-config-loader.ts — jiti loader
|
|
22
|
+
* @see docs/design-docs/architecture-overhaul/typescript-configuration.md
|
|
23
|
+
*/
|
|
24
|
+
export interface ConfigLoadResult<T> {
|
|
25
|
+
/** The parsed configuration data */
|
|
26
|
+
data: T;
|
|
27
|
+
/** The file that was loaded */
|
|
28
|
+
filePath: string;
|
|
29
|
+
/** The format that was used */
|
|
30
|
+
format: "ts" | "js" | "yaml" | "json";
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Load a config file by name, searching for TS/JS/YAML/JSON variants.
|
|
34
|
+
*
|
|
35
|
+
* @param name - Config name without extension (e.g., "models", "rubrics")
|
|
36
|
+
* @param rootDir - Package root directory containing `config/`
|
|
37
|
+
* @param options - Optional: subdirectory (default "config"), required flag
|
|
38
|
+
* @returns The parsed config data
|
|
39
|
+
* @throws If the file is not found (when required) or fails to parse
|
|
40
|
+
*/
|
|
41
|
+
export declare function loadConfigFile<T>(name: string, rootDir: string, options?: {
|
|
42
|
+
subdir?: string;
|
|
43
|
+
}): ConfigLoadResult<T>;
|
|
44
|
+
/**
|
|
45
|
+
* Try to load a config file, returning null if not found.
|
|
46
|
+
* Equivalent to loadConfigFile with required: false but with a cleaner API.
|
|
47
|
+
*/
|
|
48
|
+
export declare function tryLoadConfigFile<T>(name: string, rootDir: string, options?: {
|
|
49
|
+
subdir?: string;
|
|
50
|
+
}): ConfigLoadResult<T> | null;
|
|
51
|
+
/** Sentinel error for config-not-found (distinct from parse errors) */
|
|
52
|
+
export declare class ConfigNotFoundError extends Error {
|
|
53
|
+
readonly configName: string;
|
|
54
|
+
readonly searchDir: string;
|
|
55
|
+
constructor(configName: string, searchDir: string);
|
|
56
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified config file loader — resolves TS/JS/YAML/JSON with format priority.
|
|
3
|
+
*
|
|
4
|
+
* Provides a single `loadConfigFile()` function that replaces the scattered
|
|
5
|
+
* `readFileSync + load(raw)` pattern throughout the pipeline. Checks for
|
|
6
|
+
* TypeScript/JavaScript config files first (via jiti), falling back to YAML
|
|
7
|
+
* and JSON — matching the resolution order established in Phase 1.
|
|
8
|
+
*
|
|
9
|
+
* Priority: `.ts` > `.js` > `.yaml` > `.yml` > `.json`
|
|
10
|
+
*
|
|
11
|
+
* Usage:
|
|
12
|
+
* ```ts
|
|
13
|
+
* // Before (scattered pattern):
|
|
14
|
+
* const raw = readFileSync(resolve(rootDir, "config", "models config"), "utf-8")
|
|
15
|
+
* const config = load(raw) as ModelsConfig
|
|
16
|
+
*
|
|
17
|
+
* // After:
|
|
18
|
+
* const config = loadConfigFile<ModelsConfig>("models", rootDir)
|
|
19
|
+
* ```
|
|
20
|
+
*
|
|
21
|
+
* @see packages/eval/src/adapters/config-sources/ts-config-loader.ts — jiti loader
|
|
22
|
+
* @see docs/design-docs/architecture-overhaul/typescript-configuration.md
|
|
23
|
+
*/
|
|
24
|
+
import { createRequire } from "module";
|
|
25
|
+
import { existsSync, readFileSync } from "fs";
|
|
26
|
+
import { load } from "js-yaml";
|
|
27
|
+
import { resolve } from "path";
|
|
28
|
+
/**
|
|
29
|
+
* Load a config file by name, searching for TS/JS/YAML/JSON variants.
|
|
30
|
+
*
|
|
31
|
+
* @param name - Config name without extension (e.g., "models", "rubrics")
|
|
32
|
+
* @param rootDir - Package root directory containing `config/`
|
|
33
|
+
* @param options - Optional: subdirectory (default "config"), required flag
|
|
34
|
+
* @returns The parsed config data
|
|
35
|
+
* @throws If the file is not found (when required) or fails to parse
|
|
36
|
+
*/
|
|
37
|
+
export function loadConfigFile(name, rootDir, options) {
|
|
38
|
+
const subdir = options?.subdir ?? "config";
|
|
39
|
+
const basePath = resolve(rootDir, subdir, name);
|
|
40
|
+
// Priority chain: .ts > .js > .yaml > .yml > .json
|
|
41
|
+
const candidates = [
|
|
42
|
+
{ ext: ".ts", format: "ts" },
|
|
43
|
+
{ ext: ".js", format: "js" },
|
|
44
|
+
{ ext: ".yaml", format: "yaml" },
|
|
45
|
+
{ ext: ".yml", format: "yaml" },
|
|
46
|
+
{ ext: ".json", format: "json" },
|
|
47
|
+
];
|
|
48
|
+
for (const { ext, format } of candidates) {
|
|
49
|
+
const filePath = basePath + ext;
|
|
50
|
+
if (!existsSync(filePath))
|
|
51
|
+
continue;
|
|
52
|
+
if (format === "ts" || format === "js") {
|
|
53
|
+
return loadTsFile(filePath, format);
|
|
54
|
+
}
|
|
55
|
+
if (format === "yaml") {
|
|
56
|
+
return loadYamlFile(filePath);
|
|
57
|
+
}
|
|
58
|
+
if (format === "json") {
|
|
59
|
+
return loadJsonFile(filePath);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
// Always throw ConfigNotFoundError so tryLoadConfigFile can catch it
|
|
63
|
+
throw new ConfigNotFoundError(name, resolve(rootDir, subdir));
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Try to load a config file, returning null if not found.
|
|
67
|
+
* Equivalent to loadConfigFile with required: false but with a cleaner API.
|
|
68
|
+
*/
|
|
69
|
+
export function tryLoadConfigFile(name, rootDir, options) {
|
|
70
|
+
try {
|
|
71
|
+
return loadConfigFile(name, rootDir, options);
|
|
72
|
+
}
|
|
73
|
+
catch (err) {
|
|
74
|
+
if (err instanceof ConfigNotFoundError)
|
|
75
|
+
return null;
|
|
76
|
+
throw err;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
/** Sentinel error for config-not-found (distinct from parse errors) */
|
|
80
|
+
export class ConfigNotFoundError extends Error {
|
|
81
|
+
configName;
|
|
82
|
+
searchDir;
|
|
83
|
+
constructor(configName, searchDir) {
|
|
84
|
+
super(`Config "${configName}" not found in ${searchDir}`);
|
|
85
|
+
this.configName = configName;
|
|
86
|
+
this.searchDir = searchDir;
|
|
87
|
+
this.name = "ConfigNotFoundError";
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
// ---------------------------------------------------------------------------
|
|
91
|
+
// Format-specific loaders
|
|
92
|
+
// ---------------------------------------------------------------------------
|
|
93
|
+
function loadTsFile(filePath, format) {
|
|
94
|
+
// jiti supports sync loading. Use createRequire for ESM compatibility.
|
|
95
|
+
const esmRequire = createRequire(import.meta.url);
|
|
96
|
+
const { createJiti } = esmRequire("jiti");
|
|
97
|
+
const jiti = createJiti(filePath, { interopDefault: true });
|
|
98
|
+
const mod = jiti(filePath);
|
|
99
|
+
const data = (mod?.default ?? mod);
|
|
100
|
+
return { data, filePath, format };
|
|
101
|
+
}
|
|
102
|
+
function loadYamlFile(filePath) {
|
|
103
|
+
const raw = readFileSync(filePath, "utf-8");
|
|
104
|
+
const data = load(raw);
|
|
105
|
+
return { data, filePath, format: "yaml" };
|
|
106
|
+
}
|
|
107
|
+
function loadJsonFile(filePath) {
|
|
108
|
+
const raw = readFileSync(filePath, "utf-8");
|
|
109
|
+
const data = JSON.parse(raw);
|
|
110
|
+
return { data, filePath, format: "json" };
|
|
111
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fixture resolver — resolves fixture references into content for compilation.
|
|
3
|
+
*
|
|
4
|
+
* Handles document fixtures (fetched from Sanity or local files),
|
|
5
|
+
* file fixtures (read from disk), and inline fixtures (embedded in
|
|
6
|
+
* task definitions). Resolved content is injected into the TaskNode's
|
|
7
|
+
* VariableEnvelope for the compiler to use.
|
|
8
|
+
*
|
|
9
|
+
* Currently supports the existing fixture patterns:
|
|
10
|
+
* - `file://contexts/canonical/<id>.md` → read from local fs
|
|
11
|
+
* - Inline `vars.docs` strings → used as-is
|
|
12
|
+
* - Canonical doc references → resolved by DocFetcher port
|
|
13
|
+
*
|
|
14
|
+
* Future phases will add URI scheme resolution (repo://, sanity://, etc.)
|
|
15
|
+
* as described in the fixtures-artifacts design doc.
|
|
16
|
+
*
|
|
17
|
+
* @see docs/design-docs/architecture-overhaul/fixtures-artifacts.md
|
|
18
|
+
* @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
|
|
19
|
+
*/
|
|
20
|
+
import type { GeneralizedTaskDefinition, ResolvedFixture, VariableEnvelope } from "../../_vendor/ailf-core/index.d.ts";
|
|
21
|
+
/** Options for fixture resolution */
|
|
22
|
+
export interface FixtureResolverOptions {
|
|
23
|
+
/** Root directory for resolving relative paths */
|
|
24
|
+
rootDir: string;
|
|
25
|
+
}
|
|
26
|
+
/** Result of resolving fixtures for a task */
|
|
27
|
+
export interface FixtureResolutionResult {
|
|
28
|
+
/** Resolved fixture content, keyed by fixture ID */
|
|
29
|
+
fixtures: Map<string, ResolvedFixture>;
|
|
30
|
+
/** Updated variable envelope with resolved doc content */
|
|
31
|
+
updatedVars: VariableEnvelope;
|
|
32
|
+
/** Warnings from resolution (non-fatal issues) */
|
|
33
|
+
warnings: string[];
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Resolve fixtures for a task definition.
|
|
37
|
+
*
|
|
38
|
+
* Reads `vars.docs` and canonical doc references, resolves them to
|
|
39
|
+
* content, and updates the variable envelope.
|
|
40
|
+
*/
|
|
41
|
+
export declare function resolveTaskFixtures(task: GeneralizedTaskDefinition, currentVars: VariableEnvelope, options: FixtureResolverOptions): FixtureResolutionResult;
|