@sanity/ailf 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +29 -12
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
- package/dist/_vendor/ailf-core/config-helpers.js +51 -2
- package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
- package/dist/_vendor/ailf-core/examples/index.js +213 -94
- package/dist/_vendor/ailf-core/index.d.ts +3 -2
- package/dist/_vendor/ailf-core/index.js +2 -1
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
- package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +7 -1
- package/dist/adapters/config-sources/ts-config-loader.js +21 -13
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
- package/dist/adapters/task-sources/index.d.ts +3 -4
- package/dist/adapters/task-sources/index.js +3 -4
- package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
- package/dist/adapters/task-sources/repo-schemas.js +228 -20
- package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
- package/dist/adapters/task-sources/repo-task-source.js +81 -122
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
- package/dist/adapters/task-sources/task-file-loader.js +21 -7
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/coverage-audit.js +3 -1
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +5 -4
- package/dist/commands/init.js +190 -25
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +15 -4
- package/dist/composition-root.js +100 -55
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/build-step-sequence.js +4 -2
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +32 -19
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +77 -26
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +51 -31
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
- package/dist/pipeline/compiler/literacy-bridge.js +2 -2
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
- package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
- package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/expand-tasks.d.ts +2 -2
- package/dist/pipeline/expand-tasks.js +2 -2
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +16 -9
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +16 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
- package/dist/pipeline/mirror-repo-tasks.js +10 -10
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +68 -30
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +32 -24
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
|
@@ -9,12 +9,12 @@
|
|
|
9
9
|
* Ports & Adapters migration (Phase 0c). The original file is now a
|
|
10
10
|
* re-export barrel that preserves backward compatibility.
|
|
11
11
|
*/
|
|
12
|
-
import type { DocumentRef as _DocumentRef, EvalMode
|
|
12
|
+
import type { DocumentRef as _DocumentRef, EvalMode } from "../../ailf-shared/index.d.ts";
|
|
13
13
|
export type { ActualScoreEntry, ComponentResult, TestResult, UrlMetadata, } from "./scoring-input.js";
|
|
14
14
|
export type { DocumentRef } from "../../ailf-shared/index.d.ts";
|
|
15
15
|
export type { StoredBaseline, StoredReport, StoredRun, StoredTaskResult, StoredTrace, SchemaVersioned, } from "./storage-schema.js";
|
|
16
16
|
export { CURRENT_SCHEMA_VERSION, isSchemaVersioned, migrateDocument, } from "./storage-schema.js";
|
|
17
|
-
export type { AssertionRegistration, FixtureResolverRegistration, ModeRegistration, PluginManifest, PluginRegistry, PresetDefinition, ReportSinkRegistration, RubricTemplateRegistration, } from "./plugin-registry.js";
|
|
17
|
+
export type { AssertionRegistration, FixtureResolverRegistration, ModeBase, ModeRegistration, PluginManifest, PluginRegistry, PresetDefinition, ReportSinkRegistration, RubricTemplateRegistration, } from "./plugin-registry.js";
|
|
18
18
|
export { InMemoryPluginRegistry } from "./plugin-registry.js";
|
|
19
19
|
export type { AgentHarnessConfig, AgentHarnessModeConfig, CustomModeConfig, EvalModeConfig, EvalModeType, KnowledgeBaseRef, KnowledgeProbeModeConfig, LiteracyModeConfig, MCPServerConfig, MCPServerModeConfig, ProbeStrategy, SandboxConfig, ToolDef, } from "./eval-mode-config.js";
|
|
20
20
|
export { evalModeType } from "./eval-mode-config.js";
|
|
@@ -25,7 +25,6 @@ export type { ArtifactId, Brand, Err, FixtureId, IdValidationError, NewReportId,
|
|
|
25
25
|
export { err, fixtureId, ok, providerId, resultId, runId, suiteId, taskId, traceId, } from "./branded-ids.js";
|
|
26
26
|
export type { AgentHarnessTaskDefinition, CustomTaskDefinition, GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, GeneralizedValueAssertion, IdDocRef, KnowledgeProbeTaskDefinition, LiteracyTaskDefinition, MCPServerTaskDefinition, PathDocRef, PerspectiveDocRef, RubricRef, SlugDocRef, TaskCommonFields, TaskDifficulty, TaskOptions, TaskProviderConfig, TaskStatus, } from "./generalized-task.js";
|
|
27
27
|
type DocumentRef = _DocumentRef;
|
|
28
|
-
type EvalMode = _EvalMode;
|
|
29
28
|
/** Aggregated retrieval metrics for a feature area */
|
|
30
29
|
export interface AreaRetrievalMetrics {
|
|
31
30
|
area: string;
|
|
@@ -119,7 +118,7 @@ export interface FailureModeReport {
|
|
|
119
118
|
totalJudgments: number;
|
|
120
119
|
}
|
|
121
120
|
/** Failure mode classification for a low-scoring judgment */
|
|
122
|
-
export type FailureModeType = "incorrect-docs" | "missing-docs" | "model-limitation" | "outdated-docs" | "poor-structure" | "unclassified";
|
|
121
|
+
export type FailureModeType = "api-error" | "incorrect-docs" | "missing-docs" | "model-limitation" | "outdated-docs" | "poor-structure" | "unclassified";
|
|
123
122
|
/** Per-feature-area score breakdown */
|
|
124
123
|
export interface FeatureScore {
|
|
125
124
|
/**
|
|
@@ -352,11 +351,40 @@ export interface ModelEntry {
|
|
|
352
351
|
env?: string;
|
|
353
352
|
id: string;
|
|
354
353
|
label: string;
|
|
355
|
-
|
|
354
|
+
/**
|
|
355
|
+
* Which evaluation modes this model participates in.
|
|
356
|
+
*
|
|
357
|
+
* Values must be canonical eval mode names (e.g., "literacy", "mcp-server").
|
|
358
|
+
* When omitted, the model participates in all modes.
|
|
359
|
+
*/
|
|
360
|
+
modes?: EvalMode[];
|
|
361
|
+
/**
|
|
362
|
+
* Per-provider timeout in ms. Emitted into Promptfoo provider config.
|
|
363
|
+
* Default: 300_000 (5 min, matching Promptfoo's built-in default).
|
|
364
|
+
*/
|
|
365
|
+
timeoutMs?: number;
|
|
366
|
+
/**
|
|
367
|
+
* Per-mode variant whitelist. Keys are eval mode IDs, values are arrays
|
|
368
|
+
* of variant IDs to include for that mode.
|
|
369
|
+
*
|
|
370
|
+
* When a model enrolls in a mode (via `modes`) but does not specify
|
|
371
|
+
* variants for it here, ALL variants defined by the mode base are included.
|
|
372
|
+
*
|
|
373
|
+
* Only meaningful for modes that define variants (e.g., literacy has
|
|
374
|
+
* "baseline", "observed", "agentic-naive", "agentic-optimized").
|
|
375
|
+
* Ignored for modes without variants.
|
|
376
|
+
*/
|
|
377
|
+
variants?: Partial<Record<EvalMode, string[]>>;
|
|
356
378
|
}
|
|
357
379
|
/** Parsed config/models.yaml structure */
|
|
358
380
|
export interface ModelsConfig {
|
|
359
381
|
defaults: Record<string, unknown>;
|
|
382
|
+
/**
|
|
383
|
+
* Maximum wall-clock time per eval step (all tests for one mode) in ms.
|
|
384
|
+
* When exceeded, the subprocess is killed and partial results are used.
|
|
385
|
+
* Default: no limit (backward compatible).
|
|
386
|
+
*/
|
|
387
|
+
evalBudgetMs?: number;
|
|
360
388
|
grader: {
|
|
361
389
|
id: string;
|
|
362
390
|
label?: string;
|
|
@@ -507,6 +535,21 @@ export interface TestSummary {
|
|
|
507
535
|
task: string;
|
|
508
536
|
error: string;
|
|
509
537
|
}[];
|
|
538
|
+
/** Per-test timing statistics (when latencyMs is available from Promptfoo) */
|
|
539
|
+
timing?: {
|
|
540
|
+
/** Median test duration in ms */
|
|
541
|
+
medianMs: number;
|
|
542
|
+
/** 95th percentile test duration in ms */
|
|
543
|
+
p95Ms: number;
|
|
544
|
+
/** Maximum test duration in ms */
|
|
545
|
+
maxMs: number;
|
|
546
|
+
/** Tests that exceeded the slow threshold (2x median, min 60s) */
|
|
547
|
+
slowTests: {
|
|
548
|
+
task: string;
|
|
549
|
+
model: string;
|
|
550
|
+
durationMs: number;
|
|
551
|
+
}[];
|
|
552
|
+
};
|
|
510
553
|
}
|
|
511
554
|
/** Token usage and estimated cost for a pipeline run. */
|
|
512
555
|
export interface PipelineUsage {
|
|
@@ -575,8 +618,6 @@ export interface ProductFeature {
|
|
|
575
618
|
sections: string[];
|
|
576
619
|
/** Coverage status */
|
|
577
620
|
status: "covered" | "out-of-scope" | "planned" | "uncovered";
|
|
578
|
-
/** Number of evaluation tasks (if covered) */
|
|
579
|
-
taskCount?: number;
|
|
580
621
|
}
|
|
581
622
|
/** Full classification of a content release for evaluation */
|
|
582
623
|
export interface ReleaseClassification {
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Plugin registry — typed extension points for AILF evaluation capabilities.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
4
|
+
* Three-tier architecture:
|
|
5
|
+
* - **Mode bases** define evaluation methodology (rubrics, scoring, prompts)
|
|
6
|
+
* - **Domain presets** target a mode base and add domain config (sources,
|
|
7
|
+
* features, doc fetcher)
|
|
8
|
+
* - **Framework assertions** are generic evaluation primitives available to
|
|
9
|
+
* all modes
|
|
10
10
|
*
|
|
11
11
|
* @see docs/design-docs/architecture-overhaul/extensibility-plugins.md
|
|
12
12
|
*/
|
|
@@ -14,6 +14,25 @@ import type { PromptTemplate } from "../ports/mode-handler.js";
|
|
|
14
14
|
import type { DocFetcher } from "../ports/doc-fetcher.js";
|
|
15
15
|
import type { SourceEntry } from "../config-helpers.js";
|
|
16
16
|
import type { FeatureRegistry } from "../schemas/pipeline.js";
|
|
17
|
+
/**
|
|
18
|
+
* A named variant within an evaluation mode.
|
|
19
|
+
*
|
|
20
|
+
* Modes can declare variants to represent different evaluation strategies
|
|
21
|
+
* that share the same methodology. For example, the literacy mode has
|
|
22
|
+
* "baseline", "observed", and "agentic-*" variants.
|
|
23
|
+
*
|
|
24
|
+
* Models opt into specific variants via `ModelEntry.variants`. When a model
|
|
25
|
+
* enrolls in a mode without specifying variants, all defined variants are
|
|
26
|
+
* included by default.
|
|
27
|
+
*/
|
|
28
|
+
export interface ModeVariantDefinition {
|
|
29
|
+
/** Variant identifier (e.g., "baseline", "agentic-naive") */
|
|
30
|
+
id: string;
|
|
31
|
+
/** Human-readable label (e.g., "Standard (baseline)") */
|
|
32
|
+
label: string;
|
|
33
|
+
/** Optional description for docs/CLI help */
|
|
34
|
+
description?: string;
|
|
35
|
+
}
|
|
17
36
|
/** A registered evaluation mode handler */
|
|
18
37
|
export interface ModeRegistration {
|
|
19
38
|
/** Unique mode identifier (e.g., "api-contract") */
|
|
@@ -26,6 +45,14 @@ export interface ModeRegistration {
|
|
|
26
45
|
rubricTemplateIds: string[];
|
|
27
46
|
/** Compile function module path (loaded at runtime) */
|
|
28
47
|
handlerModule: string;
|
|
48
|
+
/**
|
|
49
|
+
* Variants this mode supports. Omit or empty for modes without variants.
|
|
50
|
+
*
|
|
51
|
+
* When defined, models can selectively opt into specific variants via
|
|
52
|
+
* `ModelEntry.variants`. Models that enroll in the mode without specifying
|
|
53
|
+
* variants participate in all defined variants.
|
|
54
|
+
*/
|
|
55
|
+
variants?: ModeVariantDefinition[];
|
|
29
56
|
}
|
|
30
57
|
/** A registered assertion type */
|
|
31
58
|
export interface AssertionRegistration {
|
|
@@ -33,8 +60,12 @@ export interface AssertionRegistration {
|
|
|
33
60
|
type: string;
|
|
34
61
|
/** Human-readable label */
|
|
35
62
|
label: string;
|
|
36
|
-
/**
|
|
37
|
-
|
|
63
|
+
/**
|
|
64
|
+
* Which modes this assertion is compatible with.
|
|
65
|
+
* When omitted, the assertion is compatible with all modes.
|
|
66
|
+
* When specified, acts as a whitelist of mode IDs.
|
|
67
|
+
*/
|
|
68
|
+
compatibleModes?: string[];
|
|
38
69
|
/** Assertion handler module path */
|
|
39
70
|
handlerModule: string;
|
|
40
71
|
}
|
|
@@ -65,6 +96,30 @@ export interface ReportSinkRegistration {
|
|
|
65
96
|
/** Sink module path */
|
|
66
97
|
handlerModule: string;
|
|
67
98
|
}
|
|
99
|
+
/**
|
|
100
|
+
* ModeBase — shared evaluation methodology for a mode.
|
|
101
|
+
*
|
|
102
|
+
* Defines HOW you evaluate (rubrics, scoring, prompts) independently of
|
|
103
|
+
* WHAT you're evaluating (sources, features, docs). Multiple domain presets
|
|
104
|
+
* can target the same mode base and inherit its defaults.
|
|
105
|
+
*
|
|
106
|
+
* Example: the "literacy" mode base defines rubric templates for
|
|
107
|
+
* task-completion, code-correctness, and doc-coverage. Both a Sanity docs
|
|
108
|
+
* preset and an external docs preset can target "literacy" and inherit
|
|
109
|
+
* these rubrics without redefining them.
|
|
110
|
+
*/
|
|
111
|
+
export interface ModeBase {
|
|
112
|
+
/** The mode registration (handler, provider patterns, rubric template IDs) */
|
|
113
|
+
mode: ModeRegistration;
|
|
114
|
+
/** Default rubric templates for this mode */
|
|
115
|
+
rubricTemplates?: RubricTemplateRegistration[];
|
|
116
|
+
/** Default scoring profiles for this mode (profile name → dimension weights) */
|
|
117
|
+
scoringProfiles?: Record<string, Record<string, number>>;
|
|
118
|
+
/** Default prompt templates for this mode (template name → template) */
|
|
119
|
+
promptTemplates?: Record<string, PromptTemplate>;
|
|
120
|
+
/** Mode-specific assertion types (beyond framework builtins) */
|
|
121
|
+
assertions?: AssertionRegistration[];
|
|
122
|
+
}
|
|
68
123
|
/** Plugin manifest describing a single plugin */
|
|
69
124
|
export interface PluginManifest {
|
|
70
125
|
/** Plugin name (npm package style) */
|
|
@@ -80,32 +135,49 @@ export interface PluginManifest {
|
|
|
80
135
|
/** Dependencies on other plugins */
|
|
81
136
|
requires?: string[];
|
|
82
137
|
}
|
|
83
|
-
/**
|
|
138
|
+
/**
|
|
139
|
+
* A domain preset targets a mode base and adds domain-specific configuration.
|
|
140
|
+
*
|
|
141
|
+
* The preset inherits evaluation methodology (rubrics, scoring, prompts) from
|
|
142
|
+
* its mode base. It can optionally override any inherited values.
|
|
143
|
+
*/
|
|
84
144
|
export interface PresetDefinition {
|
|
85
|
-
/** Preset name */
|
|
145
|
+
/** Preset name (unique identifier) */
|
|
86
146
|
name: string;
|
|
87
147
|
/** Plugin manifest */
|
|
88
148
|
manifest: PluginManifest;
|
|
89
|
-
/**
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
149
|
+
/**
|
|
150
|
+
* Lifecycle status — mirrors task status semantics.
|
|
151
|
+
* active: registered and used in evaluations (default)
|
|
152
|
+
* draft: registered but skipped unless explicitly targeted
|
|
153
|
+
* paused: registered but skipped (can be resumed)
|
|
154
|
+
* archived: not registered
|
|
155
|
+
*/
|
|
156
|
+
status?: "active" | "archived" | "draft" | "paused";
|
|
157
|
+
/**
|
|
158
|
+
* Which mode this preset targets (by mode ID).
|
|
159
|
+
* Links to a registered ModeBase. The preset inherits rubrics,
|
|
160
|
+
* scoring profiles, and prompt templates from the base.
|
|
161
|
+
*/
|
|
162
|
+
mode: string;
|
|
163
|
+
/** Fixture resolvers */
|
|
96
164
|
fixtureResolvers?: FixtureResolverRegistration[];
|
|
97
|
-
/** Report sinks
|
|
165
|
+
/** Report sinks */
|
|
98
166
|
reportSinks?: ReportSinkRegistration[];
|
|
99
|
-
/** Prompt templates keyed by template name (e.g. "with-docs", "agentic") */
|
|
100
|
-
promptTemplates?: Record<string, PromptTemplate>;
|
|
101
|
-
/** Scoring profiles mapping profile name to dimension-weight pairs */
|
|
102
|
-
scoringProfiles?: Record<string, Record<string, number>>;
|
|
103
167
|
/** Factory function that creates a DocFetcher instance */
|
|
104
168
|
docFetcher?: () => DocFetcher;
|
|
105
169
|
/** Documentation source definitions (production, branch, local, etc.) */
|
|
106
170
|
sourceDefs?: SourceEntry[];
|
|
107
171
|
/** Product feature registry for coverage tracking */
|
|
108
172
|
featureDefs?: FeatureRegistry;
|
|
173
|
+
/** Override rubric templates (merged by ID with mode base) */
|
|
174
|
+
rubricTemplates?: RubricTemplateRegistration[];
|
|
175
|
+
/** Override scoring profiles (merged by name with mode base) */
|
|
176
|
+
scoringProfiles?: Record<string, Record<string, number>>;
|
|
177
|
+
/** Override prompt templates (merged by name with mode base) */
|
|
178
|
+
promptTemplates?: Record<string, PromptTemplate>;
|
|
179
|
+
/** Additional mode-specific assertions */
|
|
180
|
+
assertions?: AssertionRegistration[];
|
|
109
181
|
}
|
|
110
182
|
/**
|
|
111
183
|
* PluginRegistry — central registry for all AILF extensions.
|
|
@@ -154,10 +226,16 @@ export interface PluginRegistry {
|
|
|
154
226
|
registerSourceDefs(sources: SourceEntry[]): void;
|
|
155
227
|
/** Get all registered source definitions */
|
|
156
228
|
getSourceDefs(): SourceEntry[];
|
|
157
|
-
/** Register a feature registry (
|
|
229
|
+
/** Register a feature registry (merged by feature ID with existing) */
|
|
158
230
|
registerFeatureDefs(features: FeatureRegistry): void;
|
|
159
231
|
/** Get the registered feature registry, if any */
|
|
160
232
|
getFeatureDefs(): FeatureRegistry | undefined;
|
|
233
|
+
/** Register a mode base (evaluation methodology) */
|
|
234
|
+
registerModeBase(base: ModeBase): void;
|
|
235
|
+
/** Get a mode base by mode ID */
|
|
236
|
+
getModeBase(modeId: string): ModeBase | undefined;
|
|
237
|
+
/** Get all registered mode bases */
|
|
238
|
+
getModeBases(): ModeBase[];
|
|
161
239
|
/** Get all registered presets */
|
|
162
240
|
getPresets(): PresetDefinition[];
|
|
163
241
|
}
|
|
@@ -170,6 +248,7 @@ export declare class InMemoryPluginRegistry implements PluginRegistry {
|
|
|
170
248
|
private readonly rubricTemplates_;
|
|
171
249
|
private readonly fixtureResolvers_;
|
|
172
250
|
private readonly reportSinks_;
|
|
251
|
+
private readonly modeBases_;
|
|
173
252
|
private readonly presets_;
|
|
174
253
|
private promptTemplates_;
|
|
175
254
|
private scoringProfiles_;
|
|
@@ -199,4 +278,7 @@ export declare class InMemoryPluginRegistry implements PluginRegistry {
|
|
|
199
278
|
getSourceDefs(): SourceEntry[];
|
|
200
279
|
registerFeatureDefs(features: FeatureRegistry): void;
|
|
201
280
|
getFeatureDefs(): FeatureRegistry | undefined;
|
|
281
|
+
registerModeBase(base: ModeBase): void;
|
|
282
|
+
getModeBase(modeId: string): ModeBase | undefined;
|
|
283
|
+
getModeBases(): ModeBase[];
|
|
202
284
|
}
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Plugin registry — typed extension points for AILF evaluation capabilities.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
4
|
+
* Three-tier architecture:
|
|
5
|
+
* - **Mode bases** define evaluation methodology (rubrics, scoring, prompts)
|
|
6
|
+
* - **Domain presets** target a mode base and add domain config (sources,
|
|
7
|
+
* features, doc fetcher)
|
|
8
|
+
* - **Framework assertions** are generic evaluation primitives available to
|
|
9
|
+
* all modes
|
|
10
10
|
*
|
|
11
11
|
* @see docs/design-docs/architecture-overhaul/extensibility-plugins.md
|
|
12
12
|
*/
|
|
@@ -19,6 +19,7 @@ export class InMemoryPluginRegistry {
|
|
|
19
19
|
rubricTemplates_ = new Map();
|
|
20
20
|
fixtureResolvers_ = new Map();
|
|
21
21
|
reportSinks_ = new Map();
|
|
22
|
+
modeBases_ = new Map();
|
|
22
23
|
presets_ = new Map();
|
|
23
24
|
promptTemplates_ = {};
|
|
24
25
|
scoringProfiles_ = {};
|
|
@@ -26,19 +27,56 @@ export class InMemoryPluginRegistry {
|
|
|
26
27
|
sourceDefs_ = [];
|
|
27
28
|
featureDefs_;
|
|
28
29
|
registerPreset(preset) {
|
|
30
|
+
// Skip archived presets entirely
|
|
31
|
+
if (preset.status === "archived")
|
|
32
|
+
return;
|
|
33
|
+
// Store draft/paused presets in the map (for later activation via
|
|
34
|
+
// --preset flag) but skip all side-effect registrations. This prevents
|
|
35
|
+
// a draft preset from silently overwriting the doc fetcher, merging
|
|
36
|
+
// scoring profiles, etc.
|
|
29
37
|
this.presets_.set(preset.name, preset);
|
|
30
|
-
if (preset.
|
|
31
|
-
|
|
32
|
-
|
|
38
|
+
if (preset.status === "draft" || preset.status === "paused")
|
|
39
|
+
return;
|
|
40
|
+
// Resolve mode base defaults
|
|
41
|
+
const base = this.modeBases_.get(preset.mode);
|
|
42
|
+
if (!base) {
|
|
43
|
+
throw new Error(`Preset "${preset.name}" targets mode "${preset.mode}" ` +
|
|
44
|
+
`but no mode base is registered for it. ` +
|
|
45
|
+
`Available mode bases: ${[...this.modeBases_.keys()].join(", ") || "(none)"}`);
|
|
46
|
+
}
|
|
47
|
+
// Mode is already registered by registerModeBase() — no need to re-register.
|
|
48
|
+
// Merge rubric templates: base defaults + preset overrides (by ID)
|
|
49
|
+
const baseRubrics = new Map((base.rubricTemplates ?? []).map((r) => [r.id, r]));
|
|
50
|
+
for (const r of preset.rubricTemplates ?? []) {
|
|
51
|
+
baseRubrics.set(r.id, r);
|
|
52
|
+
}
|
|
53
|
+
for (const r of baseRubrics.values()) {
|
|
54
|
+
this.registerRubricTemplate(r);
|
|
55
|
+
}
|
|
56
|
+
// Merge scoring profiles: base defaults + preset overrides (by name)
|
|
57
|
+
const profiles = {
|
|
58
|
+
...base.scoringProfiles,
|
|
59
|
+
...preset.scoringProfiles,
|
|
60
|
+
};
|
|
61
|
+
if (Object.keys(profiles).length > 0) {
|
|
62
|
+
this.registerScoringProfiles(profiles);
|
|
33
63
|
}
|
|
64
|
+
// Merge prompt templates: base defaults + preset overrides (by name)
|
|
65
|
+
const prompts = {
|
|
66
|
+
...base.promptTemplates,
|
|
67
|
+
...preset.promptTemplates,
|
|
68
|
+
};
|
|
69
|
+
if (Object.keys(prompts).length > 0) {
|
|
70
|
+
this.registerPromptTemplates(prompts);
|
|
71
|
+
}
|
|
72
|
+
// Merge assertions: base + preset (preset overrides by type)
|
|
73
|
+
for (const a of base.assertions ?? [])
|
|
74
|
+
this.registerAssertion(a);
|
|
34
75
|
if (preset.assertions) {
|
|
35
76
|
for (const a of preset.assertions)
|
|
36
77
|
this.registerAssertion(a);
|
|
37
78
|
}
|
|
38
|
-
|
|
39
|
-
for (const t of preset.rubricTemplates)
|
|
40
|
-
this.registerRubricTemplate(t);
|
|
41
|
-
}
|
|
79
|
+
// Register domain-specific fields
|
|
42
80
|
if (preset.fixtureResolvers) {
|
|
43
81
|
for (const r of preset.fixtureResolvers)
|
|
44
82
|
this.registerFixtureResolver(r);
|
|
@@ -47,12 +85,6 @@ export class InMemoryPluginRegistry {
|
|
|
47
85
|
for (const s of preset.reportSinks)
|
|
48
86
|
this.registerReportSink(s);
|
|
49
87
|
}
|
|
50
|
-
if (preset.promptTemplates) {
|
|
51
|
-
this.registerPromptTemplates(preset.promptTemplates);
|
|
52
|
-
}
|
|
53
|
-
if (preset.scoringProfiles) {
|
|
54
|
-
this.registerScoringProfiles(preset.scoringProfiles);
|
|
55
|
-
}
|
|
56
88
|
if (preset.docFetcher) {
|
|
57
89
|
this.registerDocFetcherFactory(preset.docFetcher);
|
|
58
90
|
}
|
|
@@ -124,9 +156,30 @@ export class InMemoryPluginRegistry {
|
|
|
124
156
|
return this.sourceDefs_;
|
|
125
157
|
}
|
|
126
158
|
registerFeatureDefs(features) {
|
|
127
|
-
this.featureDefs_
|
|
159
|
+
if (!this.featureDefs_) {
|
|
160
|
+
this.featureDefs_ = features;
|
|
161
|
+
return;
|
|
162
|
+
}
|
|
163
|
+
// Merge by feature ID: new features override existing on ID collision,
|
|
164
|
+
// existing features not in new set are preserved.
|
|
165
|
+
const merged = new Map(this.featureDefs_.features.map((f) => [f.id, f]));
|
|
166
|
+
for (const f of features.features) {
|
|
167
|
+
merged.set(f.id, f);
|
|
168
|
+
}
|
|
169
|
+
this.featureDefs_ = { features: [...merged.values()] };
|
|
128
170
|
}
|
|
129
171
|
getFeatureDefs() {
|
|
130
172
|
return this.featureDefs_;
|
|
131
173
|
}
|
|
174
|
+
registerModeBase(base) {
|
|
175
|
+
this.modeBases_.set(base.mode.id, base);
|
|
176
|
+
// Also register the mode itself so getMode() works
|
|
177
|
+
this.registerMode(base.mode);
|
|
178
|
+
}
|
|
179
|
+
getModeBase(modeId) {
|
|
180
|
+
return this.modeBases_.get(modeId);
|
|
181
|
+
}
|
|
182
|
+
getModeBases() {
|
|
183
|
+
return [...this.modeBases_.values()];
|
|
184
|
+
}
|
|
132
185
|
}
|
|
@@ -37,6 +37,21 @@ export type RawEvalMode = EvalMode | "agentic" | "baseline" | "full" | "observed
|
|
|
37
37
|
export declare const CANONICAL_EVAL_MODES: readonly ["literacy", "mcp-server", "agent-harness", "knowledge-probe", "custom"];
|
|
38
38
|
/** Legacy CLI aliases that map to `mode: "literacy"` + variant. */
|
|
39
39
|
export declare const LEGACY_EVAL_MODE_ALIASES: readonly ["baseline", "agentic", "observed", "full"];
|
|
40
|
+
/**
|
|
41
|
+
* Literacy mode variant names — each is a distinct evaluation strategy.
|
|
42
|
+
*
|
|
43
|
+
* These are the valid values for the `variant` field in PipelineRequest
|
|
44
|
+
* when `mode` is `"literacy"`. They match LEGACY_EVAL_MODE_ALIASES because
|
|
45
|
+
* variants were originally exposed as top-level mode names.
|
|
46
|
+
*
|
|
47
|
+
* - `baseline` — with-docs / without-docs comparison (gold + floor)
|
|
48
|
+
* - `agentic` — model uses tools to find docs (gold only)
|
|
49
|
+
* - `observed` — HTTP-instrumented behavior observation
|
|
50
|
+
* - `full` — combined baseline + agentic
|
|
51
|
+
*/
|
|
52
|
+
export declare const LITERACY_VARIANTS: readonly ["baseline", "agentic", "observed", "full"];
|
|
53
|
+
/** Union of all literacy variant string values. */
|
|
54
|
+
export type LiteracyVariant = (typeof LITERACY_VARIANTS)[number];
|
|
40
55
|
/**
|
|
41
56
|
* All accepted mode names for Zod enum construction.
|
|
42
57
|
* Canonical modes first, then legacy aliases.
|
|
@@ -22,6 +22,24 @@ export const LEGACY_EVAL_MODE_ALIASES = [
|
|
|
22
22
|
"observed",
|
|
23
23
|
"full",
|
|
24
24
|
];
|
|
25
|
+
/**
|
|
26
|
+
* Literacy mode variant names — each is a distinct evaluation strategy.
|
|
27
|
+
*
|
|
28
|
+
* These are the valid values for the `variant` field in PipelineRequest
|
|
29
|
+
* when `mode` is `"literacy"`. They match LEGACY_EVAL_MODE_ALIASES because
|
|
30
|
+
* variants were originally exposed as top-level mode names.
|
|
31
|
+
*
|
|
32
|
+
* - `baseline` — with-docs / without-docs comparison (gold + floor)
|
|
33
|
+
* - `agentic` — model uses tools to find docs (gold only)
|
|
34
|
+
* - `observed` — HTTP-instrumented behavior observation
|
|
35
|
+
* - `full` — combined baseline + agentic
|
|
36
|
+
*/
|
|
37
|
+
export const LITERACY_VARIANTS = [
|
|
38
|
+
"baseline",
|
|
39
|
+
"agentic",
|
|
40
|
+
"observed",
|
|
41
|
+
"full",
|
|
42
|
+
];
|
|
25
43
|
/**
|
|
26
44
|
* All accepted mode names for Zod enum construction.
|
|
27
45
|
* Canonical modes first, then legacy aliases.
|
|
@@ -17,7 +17,7 @@ const HINTS = [
|
|
|
17
17
|
/no article found for slug/i.test(e.message),
|
|
18
18
|
hint: "One or more `canonicalDocs` slugs in your task definitions don't match " +
|
|
19
19
|
"any article in the documentation. Check the `slug` values in " +
|
|
20
|
-
"`.ailf/tasks
|
|
20
|
+
"`.ailf/tasks/` and ensure they correspond to real articles.\n" +
|
|
21
21
|
" Run `ailf validate` to check your task definitions locally.",
|
|
22
22
|
},
|
|
23
23
|
{
|
|
@@ -51,7 +51,7 @@ const HINTS = [
|
|
|
51
51
|
hint: "The documentation fetch step completed but one or more tasks had " +
|
|
52
52
|
"empty context. This usually means a `canonicalDocs` slug doesn't " +
|
|
53
53
|
"match any article.\n" +
|
|
54
|
-
" Check the slug values in `.ailf/tasks
|
|
54
|
+
" Check the slug values in `.ailf/tasks/`.",
|
|
55
55
|
},
|
|
56
56
|
{
|
|
57
57
|
match: (e) => e.step === "dispatch" && /dispatch failed/i.test(e.message),
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
* @see docs/design-docs/architecture-overhaul/typescript-configuration.md
|
|
23
23
|
*/
|
|
24
24
|
import { readFileSync } from "fs";
|
|
25
|
-
import { extname } from "path";
|
|
25
|
+
import { extname, resolve } from "path";
|
|
26
26
|
import { EvalConfigSchema, PipelineRequestSchema, } from "../../_vendor/ailf-core/index.js";
|
|
27
27
|
import { mapRequestToConfig } from "../../pipeline/map-request-to-config.js";
|
|
28
28
|
import { normalizeMode } from "../../pipeline/normalize-mode.js";
|
|
@@ -93,6 +93,7 @@ function mapEvalConfigToResolvedConfig(config, rootDir) {
|
|
|
93
93
|
const normalized = normalizeMode(config.mode ?? "literacy");
|
|
94
94
|
return {
|
|
95
95
|
rootDir,
|
|
96
|
+
outputDir: resolve(rootDir, "results", "latest"),
|
|
96
97
|
mode: normalized.mode,
|
|
97
98
|
variant: normalized.variant,
|
|
98
99
|
noAutoScope: config.noAutoScope ?? false,
|
|
@@ -119,7 +120,12 @@ function mapEvalConfigToResolvedConfig(config, rootDir) {
|
|
|
119
120
|
allowedOrigins: config.allowedOrigins,
|
|
120
121
|
searchMode: config.searchMode ?? "open",
|
|
121
122
|
concurrency: config.concurrency,
|
|
123
|
+
captureEnabled: false,
|
|
124
|
+
captureDir: undefined,
|
|
125
|
+
captureCompress: true,
|
|
126
|
+
captureExtras: true,
|
|
122
127
|
remote: false,
|
|
123
128
|
apiUrl: "https://ailf-api.sanity.build",
|
|
129
|
+
presets: config.presets,
|
|
124
130
|
};
|
|
125
131
|
}
|
|
@@ -13,21 +13,29 @@
|
|
|
13
13
|
* @see docs/design-docs/architecture-overhaul/typescript-configuration.md
|
|
14
14
|
*/
|
|
15
15
|
import { existsSync } from "fs";
|
|
16
|
+
import { pathToFileURL } from "node:url";
|
|
16
17
|
import { createJiti } from "jiti";
|
|
17
18
|
// ---------------------------------------------------------------------------
|
|
18
|
-
//
|
|
19
|
+
// jiti instance factory — resolves imports relative to the loaded file
|
|
19
20
|
// ---------------------------------------------------------------------------
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
21
|
+
/**
|
|
22
|
+
* Create a jiti instance that resolves bare-specifier imports relative to
|
|
23
|
+
* the given file path, not relative to this loader module.
|
|
24
|
+
*
|
|
25
|
+
* This is critical for pnpm workspaces: a task file at `.ailf/tasks/foo.task.ts`
|
|
26
|
+
* importing `@sanity/ailf` must resolve through the dependency graph
|
|
27
|
+
* visible from the task file's directory, not from deep inside packages/eval/.
|
|
28
|
+
*
|
|
29
|
+
* We pass a `file://` URL (not a bare path) so jiti uses ESM resolution,
|
|
30
|
+
* which matches the `"import"` condition in package.json exports maps.
|
|
31
|
+
*/
|
|
32
|
+
function createJitiForFile(filePath) {
|
|
33
|
+
return createJiti(pathToFileURL(filePath).href, {
|
|
34
|
+
// Interop: handle both `export default` and `module.exports`
|
|
35
|
+
interopDefault: true,
|
|
36
|
+
// Don't require file extensions in imports
|
|
37
|
+
requireCache: true,
|
|
38
|
+
});
|
|
31
39
|
}
|
|
32
40
|
/**
|
|
33
41
|
* Load a TypeScript or JavaScript config file and return its default export.
|
|
@@ -43,7 +51,7 @@ export async function loadTsConfig(filePath) {
|
|
|
43
51
|
return { ok: false, error: `File not found: ${filePath}`, path: filePath };
|
|
44
52
|
}
|
|
45
53
|
try {
|
|
46
|
-
const jiti =
|
|
54
|
+
const jiti = createJitiForFile(filePath);
|
|
47
55
|
const mod = await jiti.import(filePath);
|
|
48
56
|
const value = extractDefault(mod);
|
|
49
57
|
if (value === undefined || value === null) {
|
|
@@ -33,6 +33,7 @@ export class PromptfooEvalAdapter {
|
|
|
33
33
|
cwd: this.rootDir,
|
|
34
34
|
env: { ...process.env, ...config.env },
|
|
35
35
|
stdio: "inherit",
|
|
36
|
+
...(config.maxDurationMs ? { timeout: config.maxDurationMs } : {}),
|
|
36
37
|
});
|
|
37
38
|
return {
|
|
38
39
|
durationMs: Date.now() - start,
|
|
@@ -40,10 +41,15 @@ export class PromptfooEvalAdapter {
|
|
|
40
41
|
summary: `Evaluation complete (${config.configPath})`,
|
|
41
42
|
};
|
|
42
43
|
}
|
|
43
|
-
catch {
|
|
44
|
+
catch (err) {
|
|
45
|
+
const isTimeout = err instanceof Error &&
|
|
46
|
+
"killed" in err &&
|
|
47
|
+
err.killed === true;
|
|
44
48
|
return {
|
|
45
49
|
durationMs: Date.now() - start,
|
|
46
|
-
error:
|
|
50
|
+
error: isTimeout
|
|
51
|
+
? `Eval subprocess killed after ${config.maxDurationMs}ms time budget`
|
|
52
|
+
: `Promptfoo evaluation failed: ${config.configPath}`,
|
|
47
53
|
status: "failed",
|
|
48
54
|
};
|
|
49
55
|
}
|
package/dist/adapters/index.d.ts
CHANGED
|
@@ -9,4 +9,3 @@ export { SanityDocFetcher } from "./doc-fetchers/index.js";
|
|
|
9
9
|
export { PromptfooEvalAdapter } from "./eval-runners/index.js";
|
|
10
10
|
export { ConsoleLogger, type ConsoleLoggerOptions, JsonLogger, QuietLogger, } from "./loggers/index.js";
|
|
11
11
|
export { CliConfigAdapter, FileConfigAdapter } from "./config-sources/index.js";
|
|
12
|
-
export { YamlTaskSource } from "./task-sources/index.js";
|
package/dist/adapters/index.js
CHANGED
|
@@ -9,4 +9,3 @@ export { SanityDocFetcher } from "./doc-fetchers/index.js";
|
|
|
9
9
|
export { PromptfooEvalAdapter } from "./eval-runners/index.js";
|
|
10
10
|
export { ConsoleLogger, JsonLogger, QuietLogger, } from "./loggers/index.js";
|
|
11
11
|
export { CliConfigAdapter, FileConfigAdapter } from "./config-sources/index.js";
|
|
12
|
-
export { YamlTaskSource } from "./task-sources/index.js";
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* tasks in a single GeneralizedTaskDefinition[].
|
|
11
11
|
*
|
|
12
12
|
* @see packages/core/src/ports/task-source.ts — TaskSource port
|
|
13
|
-
* @see docs/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
|
|
13
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
|
|
14
14
|
*/
|
|
15
15
|
import type { FilterOptions, GeneralizedTaskDefinition, TaskSource } from "../../_vendor/ailf-core/index.d.ts";
|
|
16
16
|
export declare class CompositeTaskSource implements TaskSource {
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* tasks in a single GeneralizedTaskDefinition[].
|
|
11
11
|
*
|
|
12
12
|
* @see packages/core/src/ports/task-source.ts — TaskSource port
|
|
13
|
-
* @see docs/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
|
|
13
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
|
|
14
14
|
*/
|
|
15
15
|
export class CompositeTaskSource {
|
|
16
16
|
sources;
|