@sanity/ailf 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +29 -12
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
- package/dist/_vendor/ailf-core/config-helpers.js +51 -2
- package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
- package/dist/_vendor/ailf-core/examples/index.js +213 -94
- package/dist/_vendor/ailf-core/index.d.ts +3 -2
- package/dist/_vendor/ailf-core/index.js +2 -1
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
- package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +7 -1
- package/dist/adapters/config-sources/ts-config-loader.js +21 -13
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
- package/dist/adapters/task-sources/index.d.ts +3 -4
- package/dist/adapters/task-sources/index.js +3 -4
- package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
- package/dist/adapters/task-sources/repo-schemas.js +228 -20
- package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
- package/dist/adapters/task-sources/repo-task-source.js +81 -122
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
- package/dist/adapters/task-sources/task-file-loader.js +21 -7
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/coverage-audit.js +3 -1
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +5 -4
- package/dist/commands/init.js +190 -25
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +15 -4
- package/dist/composition-root.js +100 -55
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/build-step-sequence.js +4 -2
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +32 -19
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +77 -26
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +51 -31
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
- package/dist/pipeline/compiler/literacy-bridge.js +2 -2
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
- package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
- package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/expand-tasks.d.ts +2 -2
- package/dist/pipeline/expand-tasks.js +2 -2
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +16 -9
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +16 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
- package/dist/pipeline/mirror-repo-tasks.js +10 -10
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +68 -30
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +32 -24
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Core compilation logic for knowledge probe tasks.
|
|
3
|
+
*
|
|
4
|
+
* Compiles a knowledge probe task definition into Promptfoo configuration.
|
|
5
|
+
* This is intentionally minimal — knowledge probes map almost 1:1 to
|
|
6
|
+
* basic Promptfoo test cases. The AILF value-add is type-safe authoring,
|
|
7
|
+
* cross-model comparison, and score normalization.
|
|
8
|
+
*/
|
|
9
|
+
import { mapKnowledgeProbeAssertion } from "./assertions.js";
|
|
10
|
+
import { validateKnowledgeProbeTask } from "./validation.js";
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
// Public compilation entry point
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
/**
|
|
15
|
+
* Compile a knowledge probe task definition into Promptfoo configuration.
|
|
16
|
+
*
|
|
17
|
+
* This is intentionally minimal — knowledge probes map almost 1:1 to
|
|
18
|
+
* basic Promptfoo test cases. The AILF value-add is type-safe authoring,
|
|
19
|
+
* cross-model comparison, and score normalization.
|
|
20
|
+
*/
|
|
21
|
+
export function compileKnowledgeProbeTask(task, options) {
|
|
22
|
+
const warnings = [];
|
|
23
|
+
// Validate
|
|
24
|
+
const validationErrors = validateKnowledgeProbeTask(task);
|
|
25
|
+
for (const err of validationErrors) {
|
|
26
|
+
warnings.push(`Knowledge probe "${task.id}": ${err.field} — ${err.message}`);
|
|
27
|
+
}
|
|
28
|
+
// Build providers from model list (or use a default placeholder)
|
|
29
|
+
const providers = buildProviders(options);
|
|
30
|
+
// Build prompts — knowledge probes use a single no-docs prompt
|
|
31
|
+
const prompts = buildPrompts(task);
|
|
32
|
+
// Build test cases
|
|
33
|
+
const tests = buildTestCases(task, options, warnings);
|
|
34
|
+
// Build metadata
|
|
35
|
+
const metadata = {
|
|
36
|
+
mode: "knowledge-probe",
|
|
37
|
+
probeStrategy: task.probeStrategy ?? "breadth-first",
|
|
38
|
+
noDocContext: true,
|
|
39
|
+
retrievalMetrics: false,
|
|
40
|
+
};
|
|
41
|
+
return { providers, tests, prompts, metadata, warnings };
|
|
42
|
+
}
|
|
43
|
+
// ---------------------------------------------------------------------------
|
|
44
|
+
// Provider assembly
|
|
45
|
+
// ---------------------------------------------------------------------------
|
|
46
|
+
function buildProviders(options) {
|
|
47
|
+
if (options?.models && options.models.length > 0) {
|
|
48
|
+
return options.models.map((model) => ({
|
|
49
|
+
id: model.id,
|
|
50
|
+
label: model.label,
|
|
51
|
+
config: model.config,
|
|
52
|
+
}));
|
|
53
|
+
}
|
|
54
|
+
// No models specified — return empty (caller should provide models)
|
|
55
|
+
return [];
|
|
56
|
+
}
|
|
57
|
+
// ---------------------------------------------------------------------------
|
|
58
|
+
// Prompt assembly
|
|
59
|
+
// ---------------------------------------------------------------------------
|
|
60
|
+
function buildPrompts(task) {
|
|
61
|
+
// Knowledge probes use a single prompt — no with-docs/without-docs split.
|
|
62
|
+
// The prompt IS the probe question.
|
|
63
|
+
const promptText = task.prompt?.text ??
|
|
64
|
+
task.prompt?.vars?.task ??
|
|
65
|
+
task.description ??
|
|
66
|
+
`Knowledge probe: ${task.title}`;
|
|
67
|
+
const systemMessage = task.prompt?.systemMessage;
|
|
68
|
+
return [
|
|
69
|
+
{
|
|
70
|
+
id: "knowledge-probe",
|
|
71
|
+
label: `Probe: ${task.title}`,
|
|
72
|
+
raw: systemMessage
|
|
73
|
+
? `[system]\n${systemMessage}\n\n[user]\n${String(promptText)}`
|
|
74
|
+
: String(promptText),
|
|
75
|
+
},
|
|
76
|
+
];
|
|
77
|
+
}
|
|
78
|
+
// ---------------------------------------------------------------------------
|
|
79
|
+
// Test case assembly
|
|
80
|
+
// ---------------------------------------------------------------------------
|
|
81
|
+
function buildTestCases(task, options, warnings) {
|
|
82
|
+
// Build assertions
|
|
83
|
+
const assertions = [];
|
|
84
|
+
if (task.assertions) {
|
|
85
|
+
for (const assertion of task.assertions) {
|
|
86
|
+
const raw = assertion;
|
|
87
|
+
const mapped = mapKnowledgeProbeAssertion(raw, options, warnings);
|
|
88
|
+
if (mapped)
|
|
89
|
+
assertions.push(mapped);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
// Build vars — intentionally no docs
|
|
93
|
+
const vars = {
|
|
94
|
+
task: task.prompt?.vars?.task ??
|
|
95
|
+
task.description ??
|
|
96
|
+
`Knowledge probe: ${task.title}`,
|
|
97
|
+
...(task.prompt?.vars ?? {}),
|
|
98
|
+
// Metadata for scoring pipeline
|
|
99
|
+
__mode: "knowledge-probe",
|
|
100
|
+
__probeStrategy: task.probeStrategy ?? "breadth-first",
|
|
101
|
+
};
|
|
102
|
+
// Explicitly do NOT include docs
|
|
103
|
+
// This is the defining characteristic of knowledge-probe mode
|
|
104
|
+
delete vars.docs;
|
|
105
|
+
return [
|
|
106
|
+
{
|
|
107
|
+
description: `${task.id} — ${task.title}`,
|
|
108
|
+
vars,
|
|
109
|
+
...(assertions.length > 0 ? { assert: assertions } : {}),
|
|
110
|
+
},
|
|
111
|
+
];
|
|
112
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* KnowledgeProbeModeHandler — compilation rules for `knowledge-probe` mode.
|
|
3
|
+
*
|
|
4
|
+
* The simplest mode handler. Knowledge probes measure raw model knowledge
|
|
5
|
+
* without documentation context, tool calling, or sandboxed execution.
|
|
6
|
+
* They answer: "What does this model know about X without any help?"
|
|
7
|
+
*
|
|
8
|
+
* Key properties:
|
|
9
|
+
* - No doc vars injected (intentionally empty)
|
|
10
|
+
* - Uses the without-docs prompt template (or custom prompt)
|
|
11
|
+
* - Standard LLM providers only (no agent SDKs, no MCP)
|
|
12
|
+
* - No retrieval metrics (precision/recall/F1 not applicable)
|
|
13
|
+
* - Results feed into the standard cross-model comparison pipeline
|
|
14
|
+
*
|
|
15
|
+
* This handler is the reference implementation for the mode handler pattern.
|
|
16
|
+
*
|
|
17
|
+
* @see docs/archive/exec-plans/architecture-overhaul/phase-5-knowledge-probe.md
|
|
18
|
+
* @see packages/core/src/types/generalized-task.ts — KnowledgeProbeTaskDefinition
|
|
19
|
+
*/
|
|
20
|
+
import type { ModeHandler } from "../../../../_vendor/ailf-core/index.d.ts";
|
|
21
|
+
export { compileKnowledgeProbeTask } from "./compiler.js";
|
|
22
|
+
export { KNOWLEDGE_PROBE_PROMPT_TEMPLATES } from "./prompts.js";
|
|
23
|
+
export type { KnowledgeProbeCompileOptions, KnowledgeProbeCompileResult, KnowledgeProbeMetadata, KnowledgeProbeValidationError, } from "./types.js";
|
|
24
|
+
export { validateKnowledgeProbeTask } from "./validation.js";
|
|
25
|
+
/** ModeHandler-conformant export for the knowledge-probe evaluation mode. */
|
|
26
|
+
export declare const handler: ModeHandler;
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* KnowledgeProbeModeHandler — compilation rules for `knowledge-probe` mode.
|
|
3
|
+
*
|
|
4
|
+
* The simplest mode handler. Knowledge probes measure raw model knowledge
|
|
5
|
+
* without documentation context, tool calling, or sandboxed execution.
|
|
6
|
+
* They answer: "What does this model know about X without any help?"
|
|
7
|
+
*
|
|
8
|
+
* Key properties:
|
|
9
|
+
* - No doc vars injected (intentionally empty)
|
|
10
|
+
* - Uses the without-docs prompt template (or custom prompt)
|
|
11
|
+
* - Standard LLM providers only (no agent SDKs, no MCP)
|
|
12
|
+
* - No retrieval metrics (precision/recall/F1 not applicable)
|
|
13
|
+
* - Results feed into the standard cross-model comparison pipeline
|
|
14
|
+
*
|
|
15
|
+
* This handler is the reference implementation for the mode handler pattern.
|
|
16
|
+
*
|
|
17
|
+
* @see docs/archive/exec-plans/architecture-overhaul/phase-5-knowledge-probe.md
|
|
18
|
+
* @see packages/core/src/types/generalized-task.ts — KnowledgeProbeTaskDefinition
|
|
19
|
+
*/
|
|
20
|
+
import { compileKnowledgeProbeTask } from "./compiler.js";
|
|
21
|
+
import { KNOWLEDGE_PROBE_PROMPT_TEMPLATES } from "./prompts.js";
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Re-exports
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
export { compileKnowledgeProbeTask } from "./compiler.js";
|
|
26
|
+
export { KNOWLEDGE_PROBE_PROMPT_TEMPLATES } from "./prompts.js";
|
|
27
|
+
export { validateKnowledgeProbeTask } from "./validation.js";
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
// ModeHandler adapter
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
/** ModeHandler-conformant export for the knowledge-probe evaluation mode. */
|
|
32
|
+
export const handler = {
|
|
33
|
+
getPrompts() {
|
|
34
|
+
return KNOWLEDGE_PROBE_PROMPT_TEMPLATES;
|
|
35
|
+
},
|
|
36
|
+
compileTask(task, ctx) {
|
|
37
|
+
if (!("mode" in task) || task.mode !== "knowledge-probe") {
|
|
38
|
+
throw new Error(`Knowledge probe handler received task with mode "${task.mode ?? "undefined"}" — expected "knowledge-probe"`);
|
|
39
|
+
}
|
|
40
|
+
const result = compileKnowledgeProbeTask(task, { graderProvider: ctx.graderProvider, models: ctx.models });
|
|
41
|
+
return {
|
|
42
|
+
providers: result.providers,
|
|
43
|
+
tests: result.tests,
|
|
44
|
+
prompts: result.prompts,
|
|
45
|
+
warnings: result.warnings,
|
|
46
|
+
extras: { metadata: result.metadata },
|
|
47
|
+
};
|
|
48
|
+
},
|
|
49
|
+
};
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Canonical knowledge probe prompt templates.
|
|
3
|
+
*
|
|
4
|
+
* Handler-owned prompts for knowledge probe evaluations. These ask factual
|
|
5
|
+
* questions without injecting documentation context — measuring raw model
|
|
6
|
+
* knowledge about Sanity concepts.
|
|
7
|
+
*/
|
|
8
|
+
import type { PromptTemplate } from "../../../../_vendor/ailf-core/index.d.ts";
|
|
9
|
+
export declare const KNOWLEDGE_PROBE_PROMPT_TEMPLATES: Record<string, PromptTemplate>;
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Canonical knowledge probe prompt templates.
|
|
3
|
+
*
|
|
4
|
+
* Handler-owned prompts for knowledge probe evaluations. These ask factual
|
|
5
|
+
* questions without injecting documentation context — measuring raw model
|
|
6
|
+
* knowledge about Sanity concepts.
|
|
7
|
+
*/
|
|
8
|
+
export const KNOWLEDGE_PROBE_PROMPT_TEMPLATES = {
|
|
9
|
+
"knowledge-probe": {
|
|
10
|
+
id: "knowledge-probe",
|
|
11
|
+
label: "Knowledge Probe (No Docs)",
|
|
12
|
+
template: `Answer the following question about Sanity.io based on your existing knowledge. Do not search for or reference external documentation.
|
|
13
|
+
|
|
14
|
+
## Question
|
|
15
|
+
{{task}}
|
|
16
|
+
|
|
17
|
+
## Instructions
|
|
18
|
+
|
|
19
|
+
1. Answer based solely on what you already know
|
|
20
|
+
2. Be specific — include API names, function signatures, and code examples where relevant
|
|
21
|
+
3. If you are unsure about a detail, say so rather than guessing
|
|
22
|
+
4. Provide a complete, accurate answer
|
|
23
|
+
|
|
24
|
+
Your answer:
|
|
25
|
+
`,
|
|
26
|
+
variables: ["task"],
|
|
27
|
+
},
|
|
28
|
+
};
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public types for the knowledge-probe mode handler.
|
|
3
|
+
*/
|
|
4
|
+
import type { PromptfooPrompt, PromptfooProvider, PromptfooTestCase } from "../../promptfoo-compiler.js";
|
|
5
|
+
/** Options for compiling a knowledge probe task */
|
|
6
|
+
export interface KnowledgeProbeCompileOptions {
|
|
7
|
+
/** Grader provider for LLM-graded assertions */
|
|
8
|
+
graderProvider?: string;
|
|
9
|
+
/** Model registry — knowledge probes run across all configured models */
|
|
10
|
+
models?: {
|
|
11
|
+
id: string;
|
|
12
|
+
label: string;
|
|
13
|
+
config?: Record<string, unknown>;
|
|
14
|
+
}[];
|
|
15
|
+
}
|
|
16
|
+
/** Result of compiling a single knowledge probe task */
|
|
17
|
+
export interface KnowledgeProbeCompileResult {
|
|
18
|
+
/** Promptfoo provider configs (one per model) */
|
|
19
|
+
providers: PromptfooProvider[];
|
|
20
|
+
/** Compiled test cases */
|
|
21
|
+
tests: PromptfooTestCase[];
|
|
22
|
+
/** Prompts for evaluation */
|
|
23
|
+
prompts: PromptfooPrompt[];
|
|
24
|
+
/** Mode metadata for cross-model comparison */
|
|
25
|
+
metadata: KnowledgeProbeMetadata;
|
|
26
|
+
/** Warnings generated during compilation */
|
|
27
|
+
warnings: string[];
|
|
28
|
+
}
|
|
29
|
+
/** Metadata attached to knowledge probe results for comparison */
|
|
30
|
+
export interface KnowledgeProbeMetadata {
|
|
31
|
+
/** Evaluation mode identifier */
|
|
32
|
+
mode: "knowledge-probe";
|
|
33
|
+
/** Probe strategy used */
|
|
34
|
+
probeStrategy: string;
|
|
35
|
+
/** Whether doc context was intentionally excluded */
|
|
36
|
+
noDocContext: true;
|
|
37
|
+
/** Whether retrieval metrics are applicable */
|
|
38
|
+
retrievalMetrics: false;
|
|
39
|
+
}
|
|
40
|
+
/** Validation errors for knowledge probe task definitions */
|
|
41
|
+
export interface KnowledgeProbeValidationError {
|
|
42
|
+
field: string;
|
|
43
|
+
message: string;
|
|
44
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Validation logic for knowledge probe task definitions.
|
|
3
|
+
*/
|
|
4
|
+
import type { KnowledgeProbeTaskDefinition } from "../../../../_vendor/ailf-core/index.d.ts";
|
|
5
|
+
import type { KnowledgeProbeValidationError } from "./types.js";
|
|
6
|
+
/**
|
|
7
|
+
* Validate that a knowledge probe task definition has all required fields.
|
|
8
|
+
*/
|
|
9
|
+
export declare function validateKnowledgeProbeTask(task: KnowledgeProbeTaskDefinition): KnowledgeProbeValidationError[];
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Validation logic for knowledge probe task definitions.
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Validate that a knowledge probe task definition has all required fields.
|
|
6
|
+
*/
|
|
7
|
+
export function validateKnowledgeProbeTask(task) {
|
|
8
|
+
const errors = [];
|
|
9
|
+
if (!task.id) {
|
|
10
|
+
errors.push({ field: "id", message: "Task ID is required" });
|
|
11
|
+
}
|
|
12
|
+
if (!task.title) {
|
|
13
|
+
errors.push({ field: "title", message: "Task title is required" });
|
|
14
|
+
}
|
|
15
|
+
// Knowledge probes must have either a prompt or a description
|
|
16
|
+
if (!task.prompt?.text && !task.prompt?.vars?.task && !task.description) {
|
|
17
|
+
errors.push({
|
|
18
|
+
field: "prompt",
|
|
19
|
+
message: "Knowledge probe tasks require either prompt.text, prompt.vars.task, " +
|
|
20
|
+
"or description — the question to ask the model",
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
return errors;
|
|
24
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Assertion resolution for literacy tasks.
|
|
3
|
+
*
|
|
4
|
+
* Handles rubric template resolution, doc-coverage auto-generation,
|
|
5
|
+
* and baseline assertion filtering.
|
|
6
|
+
*/
|
|
7
|
+
import type { LiteracyTaskDefinition } from "../../../../_vendor/ailf-core/index.d.ts";
|
|
8
|
+
import type { PromptfooAssertion } from "../../assertion-mapper.js";
|
|
9
|
+
import type { LiteracyCompileOptions } from "./types.js";
|
|
10
|
+
export declare function resolveAssertions(task: LiteracyTaskDefinition, options: LiteracyCompileOptions | undefined, warnings: string[]): PromptfooAssertion[];
|
|
11
|
+
/**
|
|
12
|
+
* Build baseline assertions matching the legacy expand-tasks behavior.
|
|
13
|
+
*
|
|
14
|
+
* - "full": all assertions carried over
|
|
15
|
+
* - "abbreviated": only first llm-rubric with shortened prompt
|
|
16
|
+
* - "none": no assertions
|
|
17
|
+
*/
|
|
18
|
+
export declare function buildBaselineAssertions(goldAssertions: PromptfooAssertion[], rubricMode?: "abbreviated" | "full" | "none"): PromptfooAssertion[];
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Assertion resolution for literacy tasks.
|
|
3
|
+
*
|
|
4
|
+
* Handles rubric template resolution, doc-coverage auto-generation,
|
|
5
|
+
* and baseline assertion filtering.
|
|
6
|
+
*/
|
|
7
|
+
// ---------------------------------------------------------------------------
|
|
8
|
+
// Assertion resolution
|
|
9
|
+
// ---------------------------------------------------------------------------
|
|
10
|
+
export function resolveAssertions(task, options, warnings) {
|
|
11
|
+
const assertions = [];
|
|
12
|
+
for (const a of task.assertions ?? []) {
|
|
13
|
+
if (a.type === "llm-rubric" && "template" in a) {
|
|
14
|
+
const resolved = resolveTemplatedAssertion(a, options?.rubricConfig, options?.graderProvider, warnings);
|
|
15
|
+
if (resolved)
|
|
16
|
+
assertions.push(resolved);
|
|
17
|
+
}
|
|
18
|
+
else {
|
|
19
|
+
assertions.push({
|
|
20
|
+
type: a.type,
|
|
21
|
+
...("value" in a ? { value: a.value } : {}),
|
|
22
|
+
...(typeof a.weight === "number"
|
|
23
|
+
? { weight: a.weight }
|
|
24
|
+
: {}),
|
|
25
|
+
...(a.type === "llm-rubric" && options?.graderProvider
|
|
26
|
+
? { provider: options.graderProvider }
|
|
27
|
+
: {}),
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
// Doc-coverage auto-generation
|
|
32
|
+
if (task.docCoverage) {
|
|
33
|
+
const docCoverageAssertion = buildDocCoverageAssertion(options?.rubricConfig, options?.graderProvider);
|
|
34
|
+
if (docCoverageAssertion)
|
|
35
|
+
assertions.push(docCoverageAssertion);
|
|
36
|
+
}
|
|
37
|
+
return assertions;
|
|
38
|
+
}
|
|
39
|
+
// ---------------------------------------------------------------------------
|
|
40
|
+
// Rubric template resolution
|
|
41
|
+
// ---------------------------------------------------------------------------
|
|
42
|
+
function resolveTemplatedAssertion(a, rubricConfig, graderProvider, warnings) {
|
|
43
|
+
if (!rubricConfig) {
|
|
44
|
+
warnings.push(`No rubric config — template "${a.template}" cannot be resolved`);
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
const template = rubricConfig.templates[a.template];
|
|
48
|
+
if (!template) {
|
|
49
|
+
warnings.push(`Unknown rubric template: "${a.template}"`);
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
const scaleText = template.scale.map((s) => `- ${s}`).join("\n");
|
|
53
|
+
const criteriaText = a.criteria.map((c) => `- ${c}`).join("\n");
|
|
54
|
+
const rubricValue = `${template.header}\n${scaleText}\n\n` +
|
|
55
|
+
`${template.criteria_label ?? "Check for:"}\n${criteriaText}\n\n` +
|
|
56
|
+
`Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}`;
|
|
57
|
+
return {
|
|
58
|
+
type: "llm-rubric",
|
|
59
|
+
value: rubricValue,
|
|
60
|
+
...(graderProvider ? { provider: graderProvider } : {}),
|
|
61
|
+
...(template.dimension
|
|
62
|
+
? { metadata: { dimension: template.dimension, maxScore: 100 } }
|
|
63
|
+
: {}),
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
// ---------------------------------------------------------------------------
|
|
67
|
+
// Doc-coverage assertion
|
|
68
|
+
// ---------------------------------------------------------------------------
|
|
69
|
+
function buildDocCoverageAssertion(rubricConfig, graderProvider) {
|
|
70
|
+
if (!rubricConfig?.templates["doc-coverage"])
|
|
71
|
+
return null;
|
|
72
|
+
const template = rubricConfig.templates["doc-coverage"];
|
|
73
|
+
const scaleText = template.scale.map((s) => `- ${s}`).join("\n");
|
|
74
|
+
const rubricValue = `${template.header}\n${scaleText}\n\n` +
|
|
75
|
+
`Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}`;
|
|
76
|
+
return {
|
|
77
|
+
type: "llm-rubric",
|
|
78
|
+
value: rubricValue,
|
|
79
|
+
...(graderProvider ? { provider: graderProvider } : {}),
|
|
80
|
+
...(template.dimension
|
|
81
|
+
? { metadata: { dimension: template.dimension, maxScore: 100 } }
|
|
82
|
+
: {}),
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
// ---------------------------------------------------------------------------
|
|
86
|
+
// Baseline assertion filtering
|
|
87
|
+
// ---------------------------------------------------------------------------
|
|
88
|
+
/**
|
|
89
|
+
* Build baseline assertions matching the legacy expand-tasks behavior.
|
|
90
|
+
*
|
|
91
|
+
* - "full": all assertions carried over
|
|
92
|
+
* - "abbreviated": only first llm-rubric with shortened prompt
|
|
93
|
+
* - "none": no assertions
|
|
94
|
+
*/
|
|
95
|
+
export function buildBaselineAssertions(goldAssertions, rubricMode) {
|
|
96
|
+
const mode = rubricMode ?? "full";
|
|
97
|
+
if (mode === "none")
|
|
98
|
+
return [];
|
|
99
|
+
if (mode === "full")
|
|
100
|
+
return [...goldAssertions];
|
|
101
|
+
// Abbreviated: keep first llm-rubric as summary, skip rest
|
|
102
|
+
const abbreviated = [];
|
|
103
|
+
let foundFirst = false;
|
|
104
|
+
for (const a of goldAssertions) {
|
|
105
|
+
if (a.type === "llm-rubric") {
|
|
106
|
+
if (!foundFirst) {
|
|
107
|
+
foundFirst = true;
|
|
108
|
+
abbreviated.push({
|
|
109
|
+
type: "llm-rubric",
|
|
110
|
+
value: "Score task completion from 0 to 100 (same criteria as above).\n" +
|
|
111
|
+
'Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}',
|
|
112
|
+
...(a.provider ? { provider: a.provider } : {}),
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
return abbreviated;
|
|
118
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Literacy task compilation — core compilation logic.
|
|
3
|
+
*
|
|
4
|
+
* Produces the same structure as the legacy expand-tasks.ts path:
|
|
5
|
+
* - Gold entry with with-docs prompt and canonical doc context
|
|
6
|
+
* - Baseline entry with without-docs prompt and empty docs
|
|
7
|
+
* - Rubric assertions with structured dimension metadata
|
|
8
|
+
*/
|
|
9
|
+
import type { LiteracyTaskDefinition } from "../../../../_vendor/ailf-core/index.d.ts";
|
|
10
|
+
import type { LiteracyCompileOptions, LiteracyCompileResult } from "./types.js";
|
|
11
|
+
/**
|
|
12
|
+
* Compile a literacy task into Promptfoo configuration.
|
|
13
|
+
*/
|
|
14
|
+
export declare function compileLiteracyTask(task: LiteracyTaskDefinition, options?: LiteracyCompileOptions): LiteracyCompileResult;
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Literacy task compilation — core compilation logic.
|
|
3
|
+
*
|
|
4
|
+
* Produces the same structure as the legacy expand-tasks.ts path:
|
|
5
|
+
* - Gold entry with with-docs prompt and canonical doc context
|
|
6
|
+
* - Baseline entry with without-docs prompt and empty docs
|
|
7
|
+
* - Rubric assertions with structured dimension metadata
|
|
8
|
+
*/
|
|
9
|
+
import { LiteracyVariant, } from "../../../normalize-mode.js";
|
|
10
|
+
import { buildBaselineAssertions, resolveAssertions } from "./assertions.js";
|
|
11
|
+
import { LITERACY_PROMPT_TEMPLATES } from "./prompts.js";
|
|
12
|
+
import { validateLiteracyTask } from "./validation.js";
|
|
13
|
+
/**
|
|
14
|
+
* Compile a literacy task into Promptfoo configuration.
|
|
15
|
+
*/
|
|
16
|
+
export function compileLiteracyTask(task, options) {
|
|
17
|
+
const warnings = [];
|
|
18
|
+
const evalMode = options?.evalMode ?? LiteracyVariant.STANDARD;
|
|
19
|
+
// Validation
|
|
20
|
+
for (const err of validateLiteracyTask(task)) {
|
|
21
|
+
warnings.push(`Literacy task "${task.id}": ${err.field} — ${err.message}`);
|
|
22
|
+
}
|
|
23
|
+
const providers = buildProviders(options);
|
|
24
|
+
const prompts = buildPrompts(evalMode);
|
|
25
|
+
const tests = buildTestCases(task, evalMode, options, warnings);
|
|
26
|
+
return { providers, tests, prompts, warnings };
|
|
27
|
+
}
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
// Provider assembly
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
function buildProviders(options) {
|
|
32
|
+
if (options?.models && options.models.length > 0) {
|
|
33
|
+
return options.models.map((m) => ({
|
|
34
|
+
id: m.id,
|
|
35
|
+
label: m.label,
|
|
36
|
+
config: m.config,
|
|
37
|
+
}));
|
|
38
|
+
}
|
|
39
|
+
return [];
|
|
40
|
+
}
|
|
41
|
+
// ---------------------------------------------------------------------------
|
|
42
|
+
// Prompt assembly
|
|
43
|
+
// ---------------------------------------------------------------------------
|
|
44
|
+
function templateToPromptfoo(pt) {
|
|
45
|
+
return { id: pt.id, label: pt.label, raw: pt.template };
|
|
46
|
+
}
|
|
47
|
+
function buildPrompts(evalMode) {
|
|
48
|
+
if (evalMode === "agentic") {
|
|
49
|
+
return [templateToPromptfoo(LITERACY_PROMPT_TEMPLATES["agentic"])];
|
|
50
|
+
}
|
|
51
|
+
return [
|
|
52
|
+
templateToPromptfoo(LITERACY_PROMPT_TEMPLATES["with-docs"]),
|
|
53
|
+
templateToPromptfoo(LITERACY_PROMPT_TEMPLATES["without-docs"]),
|
|
54
|
+
];
|
|
55
|
+
}
|
|
56
|
+
// ---------------------------------------------------------------------------
|
|
57
|
+
// Test case assembly
|
|
58
|
+
// ---------------------------------------------------------------------------
|
|
59
|
+
function buildTestCases(task, evalMode, options, warnings) {
|
|
60
|
+
const tests = [];
|
|
61
|
+
const promptText = task.prompt?.text ?? task.prompt?.template ?? "";
|
|
62
|
+
const contextDocs = task.context?.docs ?? [];
|
|
63
|
+
const taskArea = task.area ?? "";
|
|
64
|
+
const taskTitle = task.title;
|
|
65
|
+
const promptVars = task.prompt?.vars ?? {};
|
|
66
|
+
const hasDocs = contextDocs.length > 0;
|
|
67
|
+
const docsVar = hasDocs ? `file://contexts/canonical/${task.id}.md` : "";
|
|
68
|
+
const assertions = resolveAssertions(task, options, warnings);
|
|
69
|
+
// Gold entry — canonical docs injected
|
|
70
|
+
const goldVars = {
|
|
71
|
+
task: promptText,
|
|
72
|
+
docs: docsVar,
|
|
73
|
+
__featureArea: taskArea,
|
|
74
|
+
...promptVars,
|
|
75
|
+
};
|
|
76
|
+
tests.push({
|
|
77
|
+
description: `${taskTitle} (gold)`,
|
|
78
|
+
vars: goldVars,
|
|
79
|
+
...(evalMode === LiteracyVariant.STANDARD
|
|
80
|
+
? { prompts: ["with-docs"] }
|
|
81
|
+
: {}),
|
|
82
|
+
...(assertions.length > 0 ? { assert: assertions } : {}),
|
|
83
|
+
});
|
|
84
|
+
// Baseline entry — no docs (floor measurement)
|
|
85
|
+
if (evalMode !== "agentic") {
|
|
86
|
+
const baselineEnabled = task.baseline?.enabled !== false;
|
|
87
|
+
if (baselineEnabled) {
|
|
88
|
+
const baselineAssertions = buildBaselineAssertions(assertions, task.baseline?.rubric);
|
|
89
|
+
tests.push({
|
|
90
|
+
description: `${taskTitle} (baseline)`,
|
|
91
|
+
vars: {
|
|
92
|
+
task: promptText,
|
|
93
|
+
docs: "",
|
|
94
|
+
__featureArea: taskArea,
|
|
95
|
+
...promptVars,
|
|
96
|
+
},
|
|
97
|
+
prompts: ["without-docs"],
|
|
98
|
+
...(baselineAssertions.length > 0
|
|
99
|
+
? { assert: baselineAssertions }
|
|
100
|
+
: {}),
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
return tests;
|
|
105
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Literacy mode handler — compiles LiteracyTaskDefinition into Promptfoo config.
|
|
3
|
+
*
|
|
4
|
+
* @see docs/archive/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
|
|
5
|
+
*/
|
|
6
|
+
import type { ModeHandler } from "../../../../_vendor/ailf-core/index.d.ts";
|
|
7
|
+
export { LITERACY_PROMPT_TEMPLATES } from "./prompts.js";
|
|
8
|
+
export { validateLiteracyTask, type LiteracyValidationError, } from "./validation.js";
|
|
9
|
+
export { compileLiteracyTask } from "./compiler.js";
|
|
10
|
+
export type { LiteracyCompileOptions, LiteracyCompileResult, RubricConfig, } from "./types.js";
|
|
11
|
+
export declare const handler: ModeHandler;
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Literacy mode handler — compiles LiteracyTaskDefinition into Promptfoo config.
|
|
3
|
+
*
|
|
4
|
+
* @see docs/archive/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
|
|
5
|
+
*/
|
|
6
|
+
import { compileLiteracyTask } from "./compiler.js";
|
|
7
|
+
import { LITERACY_PROMPT_TEMPLATES } from "./prompts.js";
|
|
8
|
+
// Re-export public API
|
|
9
|
+
export { LITERACY_PROMPT_TEMPLATES } from "./prompts.js";
|
|
10
|
+
export { validateLiteracyTask, } from "./validation.js";
|
|
11
|
+
export { compileLiteracyTask } from "./compiler.js";
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
// ModeHandler adapter — wraps compileLiteracyTask for registry dispatch
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
export const handler = {
|
|
16
|
+
getPrompts() {
|
|
17
|
+
return LITERACY_PROMPT_TEMPLATES;
|
|
18
|
+
},
|
|
19
|
+
compileTask(task, ctx) {
|
|
20
|
+
if (task.mode !== "literacy") {
|
|
21
|
+
throw new Error(`Literacy handler received task with mode "${task.mode}" — expected "literacy"`);
|
|
22
|
+
}
|
|
23
|
+
const result = compileLiteracyTask(task, {
|
|
24
|
+
graderProvider: ctx.graderProvider,
|
|
25
|
+
rootDir: ctx.rootDir,
|
|
26
|
+
models: ctx.models,
|
|
27
|
+
rubricConfig: ctx.rubricConfig,
|
|
28
|
+
evalMode: ctx
|
|
29
|
+
.evalMode,
|
|
30
|
+
});
|
|
31
|
+
return {
|
|
32
|
+
providers: result.providers,
|
|
33
|
+
tests: result.tests,
|
|
34
|
+
prompts: result.prompts,
|
|
35
|
+
warnings: result.warnings,
|
|
36
|
+
};
|
|
37
|
+
},
|
|
38
|
+
};
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Canonical prompt templates for literacy-mode evaluations.
|
|
3
|
+
*
|
|
4
|
+
* These are the source-of-truth templates. Previously lived in
|
|
5
|
+
* config/prompts.ts as global templates; now handler-owned so
|
|
6
|
+
* non-literacy modes can define their own prompts without collision.
|
|
7
|
+
*/
|
|
8
|
+
import type { PromptTemplate } from "../../../../_vendor/ailf-core/index.d.ts";
|
|
9
|
+
export declare const LITERACY_PROMPT_TEMPLATES: Record<string, PromptTemplate>;
|