@sanity/ailf 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +28 -23
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +6 -0
- package/dist/_vendor/ailf-core/config-helpers.js +29 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +164 -94
- package/dist/_vendor/ailf-core/examples/index.js +208 -114
- package/dist/_vendor/ailf-core/index.d.ts +1 -0
- package/dist/_vendor/ailf-core/index.js +1 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +20 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +14 -2
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +12 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +47 -4
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +27 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
- package/dist/_vendor/ailf-tasks/cli.js +61 -0
- package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
- package/dist/_vendor/ailf-tasks/index.js +16 -0
- package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
- package/dist/_vendor/ailf-tasks/parser.js +73 -0
- package/dist/_vendor/ailf-tasks/schemas.d.ts +198 -0
- package/dist/_vendor/ailf-tasks/schemas.js +180 -0
- package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
- package/dist/_vendor/ailf-tasks/validation.js +162 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +6 -1
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +4 -6
- package/dist/adapters/task-sources/index.d.ts +1 -2
- package/dist/adapters/task-sources/index.js +1 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
- package/dist/adapters/task-sources/repo-schemas.js +2 -2
- package/dist/adapters/task-sources/repo-task-source.js +1 -1
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/task-file-loader.d.ts +9 -6
- package/dist/adapters/task-sources/task-file-loader.js +20 -6
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +2 -3
- package/dist/commands/init.js +56 -170
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/composition-root.d.ts +2 -3
- package/dist/composition-root.js +27 -14
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +30 -16
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +50 -15
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +52 -32
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/checks.d.ts +8 -3
- package/dist/pipeline/checks.js +23 -3
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +33 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/literacy-bridge.js +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +1 -1
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +1 -1
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +13 -2
- package/dist/pipeline/compiler/mode-bases/literacy.js +55 -1
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +2 -2
- package/dist/pipeline/compiler/mode-handlers/index.js +2 -2
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +69 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +307 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +22 -5
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +6 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +10 -5
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +314 -7
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +10 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +1 -1
- package/dist/pipeline/compiler/presets/sanity-literacy.js +1 -1
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +15 -8
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +15 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
- package/dist/pipeline/mirror-repo-tasks.js +1 -1
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +67 -29
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +25 -25
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
* ]
|
|
18
18
|
* ```
|
|
19
19
|
*
|
|
20
|
-
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
20
|
+
* @see docs/archive/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
21
21
|
*/
|
|
22
22
|
// ---------------------------------------------------------------------------
|
|
23
23
|
// Public API
|
|
@@ -1,13 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Config compiler — the heart of the
|
|
2
|
+
* Config compiler — the heart of the compilation architecture.
|
|
3
3
|
*
|
|
4
4
|
* Converts task definitions from any source into a TaskGraph IR,
|
|
5
5
|
* then compiles the graph into Promptfoo YAML configuration.
|
|
6
6
|
*
|
|
7
|
-
*
|
|
8
|
-
* Phase 7 will migrate callers to use the compiler exclusively.
|
|
9
|
-
*
|
|
10
|
-
* @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
|
|
7
|
+
* @see docs/archive/exec-plans/architecture-overhaul/phase-2-config-compiler.md
|
|
11
8
|
*/
|
|
12
9
|
export { buildTaskGraph, detectCycle, type TaskGraphBuildOptions, type TaskGraphBuildResult, } from "./task-graph-builder.js";
|
|
13
10
|
export { compileToPromptfoo, type CompilationResult, type CompiledPromptfooConfig, type PromptfooCompilerOptions, type PromptfooPrompt, type PromptfooProvider, type PromptfooTestCase, } from "./promptfoo-compiler.js";
|
|
@@ -1,13 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Config compiler — the heart of the
|
|
2
|
+
* Config compiler — the heart of the compilation architecture.
|
|
3
3
|
*
|
|
4
4
|
* Converts task definitions from any source into a TaskGraph IR,
|
|
5
5
|
* then compiles the graph into Promptfoo YAML configuration.
|
|
6
6
|
*
|
|
7
|
-
*
|
|
8
|
-
* Phase 7 will migrate callers to use the compiler exclusively.
|
|
9
|
-
*
|
|
10
|
-
* @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
|
|
7
|
+
* @see docs/archive/exec-plans/architecture-overhaul/phase-2-config-compiler.md
|
|
11
8
|
*/
|
|
12
9
|
// TaskGraph builder
|
|
13
10
|
export { buildTaskGraph, detectCycle, } from "./task-graph-builder.js";
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
* - Prompts from config/prompts are integrated
|
|
17
17
|
* - TaskGraphBuilder validates the DAG, deduplicates, and orders tasks
|
|
18
18
|
*
|
|
19
|
-
* @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
|
|
19
|
+
* @see docs/archive/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
|
|
20
20
|
*/
|
|
21
21
|
import type { LiteracyTaskDefinition } from "../../_vendor/ailf-core/index.d.ts";
|
|
22
22
|
import { type LiteracyCompileResult } from "./mode-handlers/literacy/index.js";
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
* - Prompts from config/prompts are integrated
|
|
17
17
|
* - TaskGraphBuilder validates the DAG, deduplicates, and orders tasks
|
|
18
18
|
*
|
|
19
|
-
* @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
|
|
19
|
+
* @see docs/archive/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
|
|
20
20
|
*/
|
|
21
21
|
import { compileLiteracyTask, } from "./mode-handlers/literacy/index.js";
|
|
22
22
|
import { tryLoadConfigFile } from "./config-loader.js";
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Tests whether an autonomous agent can complete implementation tasks
|
|
5
5
|
* end-to-end, including tool use, file creation, and code generation.
|
|
6
6
|
*
|
|
7
|
-
* @see docs/
|
|
7
|
+
* @see docs/modes.md
|
|
8
8
|
*/
|
|
9
9
|
import type { ModeBase } from "../../../_vendor/ailf-core/index.d.ts";
|
|
10
10
|
export declare function createAgentHarnessBase(): ModeBase;
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Tests what the model knows about a topic without providing documentation,
|
|
5
5
|
* establishing a baseline of model knowledge.
|
|
6
6
|
*
|
|
7
|
-
* @see docs/
|
|
7
|
+
* @see docs/modes.md
|
|
8
8
|
*/
|
|
9
9
|
import type { ModeBase } from "../../../_vendor/ailf-core/index.d.ts";
|
|
10
10
|
export declare function createKnowledgeProbeBase(): ModeBase;
|
|
@@ -6,7 +6,18 @@
|
|
|
6
6
|
* like `sanity-literacy` target this mode base and add their own sources,
|
|
7
7
|
* features, and doc fetcher.
|
|
8
8
|
*
|
|
9
|
-
* @see docs/
|
|
9
|
+
* @see docs/modes.md
|
|
10
10
|
*/
|
|
11
|
-
import type
|
|
11
|
+
import { type ModeBase, type ModelEntry } from "../../../_vendor/ailf-core/index.d.ts";
|
|
12
12
|
export declare function createLiteracyModeBase(): ModeBase;
|
|
13
|
+
/**
|
|
14
|
+
* Check whether a model participates in a specific literacy variant.
|
|
15
|
+
*
|
|
16
|
+
* A model matches if:
|
|
17
|
+
* 1. It's enrolled in the `literacy` eval mode (or has no `modes` field)
|
|
18
|
+
* 2. The variant is in its resolved variant set (defaults to all variants)
|
|
19
|
+
*
|
|
20
|
+
* This is the single source of truth for literacy variant matching —
|
|
21
|
+
* import this instead of reimplementing the pattern.
|
|
22
|
+
*/
|
|
23
|
+
export declare function modelMatchesLiteracyVariant(model: ModelEntry, variant: string): boolean;
|
|
@@ -6,9 +6,13 @@
|
|
|
6
6
|
* like `sanity-literacy` target this mode base and add their own sources,
|
|
7
7
|
* features, and doc fetcher.
|
|
8
8
|
*
|
|
9
|
-
* @see docs/
|
|
9
|
+
* @see docs/modes.md
|
|
10
10
|
*/
|
|
11
|
+
import { modelMatchesMode, resolveModelVariants, } from "../../../_vendor/ailf-core/index.js";
|
|
11
12
|
import { LITERACY_PROMPT_TEMPLATES } from "../mode-handlers/literacy/index.js";
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Mode base factory
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
12
16
|
export function createLiteracyModeBase() {
|
|
13
17
|
return {
|
|
14
18
|
mode: {
|
|
@@ -21,6 +25,28 @@ export function createLiteracyModeBase() {
|
|
|
21
25
|
"doc-coverage",
|
|
22
26
|
],
|
|
23
27
|
handlerModule: "./mode-handlers/literacy/index.js",
|
|
28
|
+
variants: [
|
|
29
|
+
{
|
|
30
|
+
id: "baseline",
|
|
31
|
+
label: "Standard (baseline)",
|
|
32
|
+
description: "Standard with-docs and without-docs evaluation prompts",
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
id: "observed",
|
|
36
|
+
label: "Observed (HTTP-instrumented)",
|
|
37
|
+
description: "HTTP-instrumented evaluation that records model behavior",
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
id: "agentic-naive",
|
|
41
|
+
label: "Agentic (naive)",
|
|
42
|
+
description: "Model uses tools to find docs with default system prompt",
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
id: "agentic-optimized",
|
|
46
|
+
label: "Agentic (optimized)",
|
|
47
|
+
description: "Model uses tools to find docs with optimized system prompt",
|
|
48
|
+
},
|
|
49
|
+
],
|
|
24
50
|
},
|
|
25
51
|
rubricTemplates: [
|
|
26
52
|
{
|
|
@@ -76,3 +102,31 @@ export function createLiteracyModeBase() {
|
|
|
76
102
|
promptTemplates: LITERACY_PROMPT_TEMPLATES,
|
|
77
103
|
};
|
|
78
104
|
}
|
|
105
|
+
// ---------------------------------------------------------------------------
|
|
106
|
+
// Shared variant matching helper
|
|
107
|
+
// ---------------------------------------------------------------------------
|
|
108
|
+
/** Lazily initialized mode base — avoids re-creating on every call */
|
|
109
|
+
let _cachedBase;
|
|
110
|
+
function getLiteracyModeBase() {
|
|
111
|
+
if (!_cachedBase)
|
|
112
|
+
_cachedBase = createLiteracyModeBase();
|
|
113
|
+
return _cachedBase;
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Check whether a model participates in a specific literacy variant.
|
|
117
|
+
*
|
|
118
|
+
* A model matches if:
|
|
119
|
+
* 1. It's enrolled in the `literacy` eval mode (or has no `modes` field)
|
|
120
|
+
* 2. The variant is in its resolved variant set (defaults to all variants)
|
|
121
|
+
*
|
|
122
|
+
* This is the single source of truth for literacy variant matching —
|
|
123
|
+
* import this instead of reimplementing the pattern.
|
|
124
|
+
*/
|
|
125
|
+
export function modelMatchesLiteracyVariant(model, variant) {
|
|
126
|
+
if (!modelMatchesMode(model, "literacy"))
|
|
127
|
+
return false;
|
|
128
|
+
const variants = resolveModelVariants(model, getLiteracyModeBase());
|
|
129
|
+
if (!variants)
|
|
130
|
+
return true;
|
|
131
|
+
return variants.includes(variant);
|
|
132
|
+
}
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Defines rubric templates and scoring for evaluating how well an LLM can
|
|
5
5
|
* discover and use MCP server tools correctly.
|
|
6
6
|
*
|
|
7
|
-
* @see docs/
|
|
7
|
+
* @see docs/modes.md
|
|
8
8
|
*/
|
|
9
9
|
import type { ModeBase } from "../../../_vendor/ailf-core/index.d.ts";
|
|
10
10
|
export declare function createMcpServerModeBase(): ModeBase;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Agent harness mode handler — compiles AgentHarnessTaskDefinition into Promptfoo config.
|
|
3
3
|
*
|
|
4
|
-
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
4
|
+
* @see docs/archive/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
5
5
|
* @see packages/core/src/types/generalized-task.ts — AgentHarnessTaskDefinition
|
|
6
6
|
*/
|
|
7
7
|
import type { ModeHandler } from "../../../../_vendor/ailf-core/index.d.ts";
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Agent harness mode handler — compiles AgentHarnessTaskDefinition into Promptfoo config.
|
|
3
3
|
*
|
|
4
|
-
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
4
|
+
* @see docs/archive/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
5
5
|
* @see packages/core/src/types/generalized-task.ts — AgentHarnessTaskDefinition
|
|
6
6
|
*/
|
|
7
7
|
import { compileAgentHarnessTask } from "./compiler.js";
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AgentHarnessModeHandler — compilation rules for `agent-harness` mode.
|
|
3
|
+
*
|
|
4
|
+
* Maps agent harness task definitions to Promptfoo configuration with:
|
|
5
|
+
* - Claude Agent SDK / OpenAI Codex SDK providers
|
|
6
|
+
* - Tool permission configuration (preset/allowed/disallowed)
|
|
7
|
+
* - Sandbox setup/teardown via Promptfoo extensions
|
|
8
|
+
* - Fixture provisioning into sandbox working directory
|
|
9
|
+
*
|
|
10
|
+
* @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
|
|
11
|
+
* @see packages/core/src/types/generalized-task.ts — AgentHarnessTaskDefinition
|
|
12
|
+
*/
|
|
13
|
+
import type { AgentHarnessTaskDefinition, ModeHandler, PromptTemplate } from "../../../_vendor/ailf-core/index.d.ts";
|
|
14
|
+
import type { PromptfooPrompt, PromptfooProvider, PromptfooTestCase } from "../promptfoo-compiler.js";
|
|
15
|
+
import type { SandboxType } from "../sandbox/sandbox-strategy.js";
|
|
16
|
+
export declare const AGENT_HARNESS_PROMPT_TEMPLATES: Record<string, PromptTemplate>;
|
|
17
|
+
/** Options for compiling an agent harness task */
|
|
18
|
+
export interface AgentHarnessCompileOptions {
|
|
19
|
+
/** Grader provider for LLM-graded assertions */
|
|
20
|
+
graderProvider?: string;
|
|
21
|
+
/** Root directory for fixture resolution */
|
|
22
|
+
rootDir?: string;
|
|
23
|
+
}
|
|
24
|
+
/** Result of compiling a single agent harness task */
|
|
25
|
+
export interface AgentHarnessCompileResult {
|
|
26
|
+
/** Promptfoo provider config */
|
|
27
|
+
providers: PromptfooProvider[];
|
|
28
|
+
/** Compiled test cases */
|
|
29
|
+
tests: PromptfooTestCase[];
|
|
30
|
+
/** Prompts for evaluation */
|
|
31
|
+
prompts: PromptfooPrompt[];
|
|
32
|
+
/** Promptfoo extensions for sandbox lifecycle */
|
|
33
|
+
extensions: PromptfooExtension[];
|
|
34
|
+
/** Sandbox configuration metadata */
|
|
35
|
+
sandboxConfig: SandboxConfigMeta;
|
|
36
|
+
/** Warnings generated during compilation */
|
|
37
|
+
warnings: string[];
|
|
38
|
+
}
|
|
39
|
+
/** Promptfoo extension hook */
|
|
40
|
+
export interface PromptfooExtension {
|
|
41
|
+
type: "afterEach" | "beforeEach";
|
|
42
|
+
/** JavaScript code or module path for the hook */
|
|
43
|
+
code: string;
|
|
44
|
+
}
|
|
45
|
+
/** Metadata about sandbox configuration for this task */
|
|
46
|
+
export interface SandboxConfigMeta {
|
|
47
|
+
type: SandboxType;
|
|
48
|
+
image?: string;
|
|
49
|
+
fixtures: string[];
|
|
50
|
+
limits?: {
|
|
51
|
+
cpus?: number;
|
|
52
|
+
memoryBytes?: number;
|
|
53
|
+
networkAccess?: boolean;
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
/** Validation errors for agent harness task definitions */
|
|
57
|
+
export interface AgentHarnessValidationError {
|
|
58
|
+
field: string;
|
|
59
|
+
message: string;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Validate that an agent harness task definition has all required fields.
|
|
63
|
+
*/
|
|
64
|
+
export declare function validateAgentHarnessTask(task: AgentHarnessTaskDefinition): AgentHarnessValidationError[];
|
|
65
|
+
/**
|
|
66
|
+
* Compile an agent harness task definition into Promptfoo configuration.
|
|
67
|
+
*/
|
|
68
|
+
export declare function compileAgentHarnessTask(task: AgentHarnessTaskDefinition, options?: AgentHarnessCompileOptions): AgentHarnessCompileResult;
|
|
69
|
+
/** ModeHandler-conformant export for the agent-harness evaluation mode. */
|
|
70
|
+
export declare const handler: ModeHandler;
|