npm - @sanity/ailf - Versions diffs - 2.0.0 → 2.0.2 - Mend

@sanity/ailf 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (444) hide show

package/canonical/grader-references/README.md +2 -2
package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
package/config/features.ts +1 -1
package/config/models.ts +28 -23
package/config/sources.ts +1 -1
package/config/thresholds.ts +1 -1
package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
package/dist/_vendor/ailf-core/config-helpers.d.ts +6 -0
package/dist/_vendor/ailf-core/config-helpers.js +29 -0
package/dist/_vendor/ailf-core/examples/index.d.ts +164 -94
package/dist/_vendor/ailf-core/examples/index.js +208 -114
package/dist/_vendor/ailf-core/index.d.ts +1 -0
package/dist/_vendor/ailf-core/index.js +1 -0
package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
package/dist/_vendor/ailf-core/ports/context.d.ts +20 -1
package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -1
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +14 -2
package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
package/dist/_vendor/ailf-core/services/index.js +1 -1
package/dist/_vendor/ailf-core/services/scoring.js +9 -0
package/dist/_vendor/ailf-core/types/generalized-task.d.ts +12 -1
package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
package/dist/_vendor/ailf-core/types/index.d.ts +47 -4
package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +27 -0
package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
package/dist/_vendor/ailf-tasks/cli.js +61 -0
package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
package/dist/_vendor/ailf-tasks/index.js +16 -0
package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
package/dist/_vendor/ailf-tasks/parser.js +73 -0
package/dist/_vendor/ailf-tasks/schemas.d.ts +198 -0
package/dist/_vendor/ailf-tasks/schemas.js +180 -0
package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
package/dist/_vendor/ailf-tasks/validation.js +162 -0
package/dist/adapters/api-client/remediation.js +2 -2
package/dist/adapters/config-sources/file-config-adapter.js +6 -1
package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
package/dist/adapters/index.d.ts +0 -1
package/dist/adapters/index.js +0 -1
package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
package/dist/adapters/task-sources/composite-task-source.js +1 -1
package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
package/dist/adapters/task-sources/content-lake-task-source.js +4 -6
package/dist/adapters/task-sources/index.d.ts +1 -2
package/dist/adapters/task-sources/index.js +1 -2
package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
package/dist/adapters/task-sources/repo-schemas.js +2 -2
package/dist/adapters/task-sources/repo-task-source.js +1 -1
package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
package/dist/adapters/task-sources/repo-trigger.js +1 -1
package/dist/adapters/task-sources/task-file-loader.d.ts +9 -6
package/dist/adapters/task-sources/task-file-loader.js +20 -6
package/dist/agent-observer/test-imports.d.ts +7 -0
package/dist/agent-observer/test-imports.js +185 -0
package/dist/artifact-capture/comparator.d.ts +22 -0
package/dist/artifact-capture/comparator.js +493 -0
package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
package/dist/artifact-capture/filesystem-collector.js +237 -0
package/dist/artifact-capture/redact-artifact.d.ts +20 -0
package/dist/artifact-capture/redact-artifact.js +115 -0
package/dist/assertions/source-isolation.d.ts +1 -1
package/dist/assertions/source-isolation.js +1 -1
package/dist/cli.js +4 -0
package/dist/commands/calculate-scores.js +1 -0
package/dist/commands/capture-compare.d.ts +15 -0
package/dist/commands/capture-compare.js +253 -0
package/dist/commands/capture-list.d.ts +12 -0
package/dist/commands/capture-list.js +147 -0
package/dist/commands/capture.d.ts +9 -0
package/dist/commands/capture.js +16 -0
package/dist/commands/chronic-failures.d.ts +8 -0
package/dist/commands/chronic-failures.js +33 -0
package/dist/commands/explain-handler.d.ts +1 -1
package/dist/commands/explain-handler.js +37 -8
package/dist/commands/fetch-docs.js +1 -0
package/dist/commands/generate-configs.d.ts +3 -3
package/dist/commands/generate-configs.js +20 -8
package/dist/commands/init.d.ts +2 -3
package/dist/commands/init.js +56 -170
package/dist/commands/pipeline-action.d.ts +7 -1
package/dist/commands/pipeline-action.js +43 -19
package/dist/commands/pipeline.d.ts +6 -1
package/dist/commands/pipeline.js +7 -2
package/dist/commands/pr-comment.js +1 -0
package/dist/commands/publish.js +1 -0
package/dist/commands/shared/help.js +2 -2
package/dist/commands/update-quality-scores.d.ts +5 -0
package/dist/commands/update-quality-scores.js +20 -0
package/dist/composition-root.d.ts +2 -3
package/dist/composition-root.js +27 -14
package/dist/config/features.ts +23 -0
package/dist/config/models.ts +100 -0
package/dist/config/prompts.ts +16 -0
package/dist/config/rubrics.ts +225 -0
package/dist/config/schedules.ts +47 -0
package/dist/config/sinks.ts +37 -0
package/dist/config/sources.ts +21 -0
package/dist/config/thresholds.ts +61 -0
package/dist/lib/agent-behavior-report.d.ts +8 -0
package/dist/lib/agent-behavior-report.js +185 -0
package/dist/lib/baseline.d.ts +19 -0
package/dist/lib/baseline.js +153 -0
package/dist/lib/calculate-scores.d.ts +23 -0
package/dist/lib/calculate-scores.js +42 -0
package/dist/lib/compare.d.ts +18 -0
package/dist/lib/compare.js +170 -0
package/dist/lib/coverage-audit.d.ts +4 -0
package/dist/lib/coverage-audit.js +42 -0
package/dist/lib/discovery-report.d.ts +13 -0
package/dist/lib/discovery-report.js +57 -0
package/dist/lib/fetch-docs.d.ts +30 -0
package/dist/lib/fetch-docs.js +171 -0
package/dist/lib/generate-configs.d.ts +25 -0
package/dist/lib/generate-configs.js +42 -0
package/dist/lib/grader-api.d.ts +21 -0
package/dist/lib/grader-api.js +34 -0
package/dist/lib/grader-compare.d.ts +19 -0
package/dist/lib/grader-compare.js +91 -0
package/dist/lib/grader-consistency.d.ts +27 -0
package/dist/lib/grader-consistency.js +79 -0
package/dist/lib/grader-sensitivity.d.ts +19 -0
package/dist/lib/grader-sensitivity.js +75 -0
package/dist/lib/grader-validate.d.ts +19 -0
package/dist/lib/grader-validate.js +78 -0
package/dist/lib/measure-retrieval.d.ts +14 -0
package/dist/lib/measure-retrieval.js +71 -0
package/dist/lib/pr-comment.d.ts +16 -0
package/dist/lib/pr-comment.js +28 -0
package/dist/lib/readiness-report.d.ts +13 -0
package/dist/lib/readiness-report.js +108 -0
package/dist/lib/webhook-server.d.ts +11 -0
package/dist/lib/webhook-server.js +24 -0
package/dist/lib/weekly-digest.d.ts +24 -0
package/dist/lib/weekly-digest.js +148 -0
package/dist/orchestration/build-app-context.js +13 -0
package/dist/orchestration/cache-context.d.ts +23 -0
package/dist/orchestration/cache-context.js +43 -0
package/dist/orchestration/env-bridge.d.ts +21 -0
package/dist/orchestration/env-bridge.js +66 -0
package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
package/dist/orchestration/load-pipeline-tasks.js +52 -0
package/dist/orchestration/pipeline-orchestrator.js +75 -5
package/dist/orchestration/step-runner.js +5 -1
package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
package/dist/orchestration/steps/calculate-scores-step.js +13 -0
package/dist/orchestration/steps/callback-step.js +10 -1
package/dist/orchestration/steps/compare-step.js +6 -3
package/dist/orchestration/steps/discovery-report-step.js +6 -2
package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
package/dist/orchestration/steps/fetch-docs-step.js +30 -16
package/dist/orchestration/steps/gap-analysis-step.js +13 -2
package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
package/dist/orchestration/steps/generate-configs-step.js +50 -15
package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
package/dist/orchestration/steps/publish-report-step.js +19 -0
package/dist/orchestration/steps/readiness-step.js +8 -3
package/dist/orchestration/steps/report-step.js +17 -4
package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
package/dist/orchestration/steps/run-eval-step.js +52 -32
package/dist/pipeline/agent-behavior-report.js +6 -0
package/dist/pipeline/attribution.d.ts +1 -1
package/dist/pipeline/attribution.js +1 -1
package/dist/pipeline/cache.js +29 -15
package/dist/pipeline/calculate-scores.d.ts +2 -0
package/dist/pipeline/calculate-scores.js +70 -33
package/dist/pipeline/checks.d.ts +8 -3
package/dist/pipeline/checks.js +23 -3
package/dist/pipeline/chronic-failures.d.ts +55 -0
package/dist/pipeline/chronic-failures.js +110 -0
package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +33 -0
package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
package/dist/pipeline/compiler/assertion-mapper.js +1 -1
package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
package/dist/pipeline/compiler/config-loader.d.ts +14 -0
package/dist/pipeline/compiler/config-loader.js +42 -2
package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
package/dist/pipeline/compiler/fixture-resolver.js +1 -1
package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
package/dist/pipeline/compiler/ignore-fields.js +1 -1
package/dist/pipeline/compiler/index.d.ts +2 -5
package/dist/pipeline/compiler/index.js +2 -5
package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
package/dist/pipeline/compiler/literacy-bridge.js +1 -1
package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +1 -1
package/dist/pipeline/compiler/mode-bases/agent-harness.js +1 -1
package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +1 -1
package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +1 -1
package/dist/pipeline/compiler/mode-bases/literacy.d.ts +13 -2
package/dist/pipeline/compiler/mode-bases/literacy.js +55 -1
package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +1 -1
package/dist/pipeline/compiler/mode-bases/mcp-server.js +1 -1
package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +1 -1
package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -1
package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
package/dist/pipeline/compiler/mode-handlers/index.d.ts +2 -2
package/dist/pipeline/compiler/mode-handlers/index.js +2 -2
package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +1 -1
package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +1 -1
package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
package/dist/pipeline/compiler/mode-handlers/literacy/index.js +1 -1
package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +334 -0
package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +1 -1
package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +1 -1
package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +1 -1
package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +1 -1
package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +4 -0
package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +69 -0
package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +307 -0
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +22 -5
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +6 -0
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +10 -5
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +314 -7
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +10 -0
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +1 -1
package/dist/pipeline/compiler/presets/sanity-literacy.js +1 -1
package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
package/dist/pipeline/compiler/provider-assembler.js +13 -7
package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/index.js +1 -1
package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
package/dist/pipeline/compiler/scoring-bridge.js +1 -1
package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
package/dist/pipeline/compiler/task-bridge.js +92 -0
package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
package/dist/pipeline/compiler/task-graph-builder.js +1 -4
package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
package/dist/pipeline/compiler/telemetry/index.js +1 -1
package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
package/dist/pipeline/compiler/variable-resolver.js +1 -1
package/dist/pipeline/coverage-audit.d.ts +1 -1
package/dist/pipeline/coverage-audit.js +1 -1
package/dist/pipeline/degradations.d.ts +1 -1
package/dist/pipeline/degradations.js +1 -1
package/dist/pipeline/failure-modes.d.ts +1 -1
package/dist/pipeline/failure-modes.js +13 -1
package/dist/pipeline/gap-analysis.d.ts +1 -1
package/dist/pipeline/gap-analysis.js +3 -1
package/dist/pipeline/generate-configs.d.ts +2 -2
package/dist/pipeline/generate-configs.js +15 -8
package/dist/pipeline/grader-compare-runner.d.ts +1 -1
package/dist/pipeline/grader-compare-runner.js +7 -1
package/dist/pipeline/grader-comparison.d.ts +1 -1
package/dist/pipeline/grader-comparison.js +1 -1
package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
package/dist/pipeline/grader-consistency-runner.js +7 -1
package/dist/pipeline/grader-consistency.d.ts +1 -1
package/dist/pipeline/grader-consistency.js +1 -1
package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
package/dist/pipeline/grader-sensitivity-runner.js +1 -1
package/dist/pipeline/grader-sensitivity.d.ts +1 -1
package/dist/pipeline/grader-sensitivity.js +1 -1
package/dist/pipeline/grader-validate-runner.d.ts +1 -1
package/dist/pipeline/grader-validate-runner.js +2 -2
package/dist/pipeline/grader-validation.d.ts +1 -1
package/dist/pipeline/grader-validation.js +1 -1
package/dist/pipeline/map-request-to-config.js +15 -2
package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
package/dist/pipeline/mirror-repo-tasks.js +1 -1
package/dist/pipeline/plan-format.d.ts +1 -1
package/dist/pipeline/plan-format.js +1 -1
package/dist/pipeline/plan.d.ts +1 -1
package/dist/pipeline/plan.js +67 -29
package/dist/pipeline/probe.d.ts +1 -1
package/dist/pipeline/probe.js +1 -1
package/dist/pipeline/readiness-report.d.ts +2 -2
package/dist/pipeline/readiness-report.js +2 -2
package/dist/pipeline/release-classification.d.ts +1 -1
package/dist/pipeline/release-classification.js +1 -1
package/dist/pipeline/release-report.d.ts +1 -1
package/dist/pipeline/release-report.js +1 -1
package/dist/pipeline/repo-eval-comment.d.ts +1 -1
package/dist/pipeline/repo-eval-comment.js +1 -1
package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
package/dist/pipeline/repo-threshold-evaluator.js +1 -1
package/dist/pipeline/resolve-mappings.d.ts +6 -6
package/dist/pipeline/resolve-mappings.js +44 -44
package/dist/pipeline/retrieval-metrics.d.ts +3 -3
package/dist/pipeline/retrieval-metrics.js +28 -20
package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
package/dist/pipeline/steps/calculate-scores-step.js +89 -0
package/dist/pipeline/steps/compare-step.d.ts +18 -0
package/dist/pipeline/steps/compare-step.js +90 -0
package/dist/pipeline/steps/eval-step.d.ts +53 -0
package/dist/pipeline/steps/eval-step.js +347 -0
package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
package/dist/pipeline/steps/fetch-docs-step.js +84 -0
package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
package/dist/pipeline/steps/generate-configs-step.js +98 -0
package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
package/dist/pipeline/steps/grader-consistency-step.js +74 -0
package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
package/dist/pipeline/steps/publish-report-step.js +243 -0
package/dist/pipeline/steps/report-step.d.ts +13 -0
package/dist/pipeline/steps/report-step.js +56 -0
package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
package/dist/pipeline/steps/update-scores-step.js +42 -0
package/dist/pipeline/targeted-loo.d.ts +1 -1
package/dist/pipeline/targeted-loo.js +1 -1
package/dist/pipeline/thresholds.d.ts +1 -1
package/dist/pipeline/thresholds.js +1 -1
package/dist/pipeline/validate.js +13 -0
package/dist/report-store.d.ts +17 -0
package/dist/report-store.js +24 -0
package/dist/scripts/agent-behavior-report.d.ts +19 -0
package/dist/scripts/agent-behavior-report.js +315 -0
package/dist/scripts/baseline.d.ts +43 -0
package/dist/scripts/baseline.js +267 -0
package/dist/scripts/calculate-scores.d.ts +166 -0
package/dist/scripts/calculate-scores.js +1296 -0
package/dist/scripts/compare.d.ts +22 -0
package/dist/scripts/compare.js +334 -0
package/dist/scripts/coverage-audit.d.ts +44 -0
package/dist/scripts/coverage-audit.js +209 -0
package/dist/scripts/debug-eval.d.ts +19 -0
package/dist/scripts/debug-eval.js +73 -0
package/dist/scripts/discovery-report.d.ts +58 -0
package/dist/scripts/discovery-report.js +250 -0
package/dist/scripts/fetch-docs.d.ts +35 -0
package/dist/scripts/fetch-docs.js +472 -0
package/dist/scripts/generate-configs.d.ts +66 -0
package/dist/scripts/generate-configs.js +459 -0
package/dist/scripts/grader-api.d.ts +27 -0
package/dist/scripts/grader-api.js +206 -0
package/dist/scripts/grader-compare.d.ts +22 -0
package/dist/scripts/grader-compare.js +368 -0
package/dist/scripts/grader-consistency.d.ts +20 -0
package/dist/scripts/grader-consistency.js +313 -0
package/dist/scripts/grader-sensitivity.d.ts +22 -0
package/dist/scripts/grader-sensitivity.js +354 -0
package/dist/scripts/grader-validate.d.ts +19 -0
package/dist/scripts/grader-validate.js +267 -0
package/dist/scripts/measure-retrieval.d.ts +10 -0
package/dist/scripts/measure-retrieval.js +145 -0
package/dist/scripts/migrate-task-mode.d.ts +1 -1
package/dist/scripts/migrate-task-mode.js +1 -1
package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
package/dist/scripts/pipeline.d.ts +76 -0
package/dist/scripts/pipeline.js +1031 -0
package/dist/scripts/pr-comment.d.ts +10 -0
package/dist/scripts/pr-comment.js +510 -0
package/dist/scripts/readiness-report.d.ts +88 -0
package/dist/scripts/readiness-report.js +342 -0
package/dist/scripts/update-quality-scores.d.ts +15 -0
package/dist/scripts/update-quality-scores.js +184 -0
package/dist/scripts/validate-task-sources.d.ts +1 -1
package/dist/scripts/validate-task-sources.js +1 -1
package/dist/scripts/validate.d.ts +13 -0
package/dist/scripts/validate.js +79 -0
package/dist/scripts/webhook-server.d.ts +26 -0
package/dist/scripts/webhook-server.js +147 -0
package/dist/scripts/weekly-digest.d.ts +24 -0
package/dist/scripts/weekly-digest.js +144 -0
package/dist/sinks/format-slack.d.ts +64 -0
package/dist/sinks/format-slack.js +306 -0
package/dist/sinks/slack-sink.d.ts +27 -0
package/dist/sinks/slack-sink.js +78 -0
package/dist/sinks/types.d.ts +1 -1
package/dist/sinks/types.js +1 -1
package/dist/sinks/webhook-sink.d.ts +19 -0
package/dist/sinks/webhook-sink.js +50 -0
package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
package/dist/tasks/literacy/content-lake.task.ts +181 -0
package/dist/tasks/literacy/frameworks.task.ts +129 -0
package/dist/tasks/literacy/functions.task.ts +70 -0
package/dist/tasks/literacy/groq.task.ts +259 -0
package/dist/tasks/literacy/image-handling.task.ts +95 -0
package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
package/dist/tasks/literacy/portable-text.task.ts +169 -0
package/dist/tasks/literacy/studio-setup.task.ts +134 -0
package/dist/tasks/literacy/visual-editing.task.ts +147 -0
package/package.json +25 -25
package/tasks/.expanded.agentic.yaml +280 -0
package/tasks/.expanded.yaml +565 -0
package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
package/tasks/literacy/content-lake.task.ts +181 -0
package/tasks/literacy/frameworks.task.ts +1 -0
package/tasks/literacy/functions.task.ts +1 -0
package/tasks/literacy/groq.task.ts +1 -0
package/tasks/literacy/image-handling.task.ts +95 -0
package/tasks/literacy/nextjs-live.task.ts +2 -1
package/tasks/literacy/portable-text.task.ts +169 -0
package/tasks/literacy/studio-setup.task.ts +5 -2
package/tasks/literacy/visual-editing.task.ts +1 -0
package/LICENSE +0 -21
package/tasks/frameworks.yaml +0 -98
package/tasks/functions.yaml +0 -51
package/tasks/groq.yaml +0 -216
package/tasks/nextjs-live.yaml +0 -62
package/tasks/studio-setup.yaml +0 -111
package/tasks/visual-editing.yaml +0 -120

package/dist/_vendor/ailf-core/index.js CHANGED Viewed

@@ -20,3 +20,4 @@ export * from "./examples/index.js";
 // ---------------------------------------------------------------------------
 export { defineConfig, defineFeatures, defineModeBase, defineModels, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./config-helpers.js";
 export { env } from "./env-helper.js";
+export { NoOpArtifactCollector } from "./artifact-capture/noop-collector.js";

package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts ADDED Viewed

@@ -0,0 +1,94 @@
+/**
+ * Port: ArtifactCollector — captures pipeline artifacts during execution.
+ *
+ * Injected into AppContext. When capture is disabled (default), the
+ * composition root provides NoOpArtifactCollector. When --capture is
+ * set, provides FilesystemArtifactCollector.
+ *
+ * Design principles:
+ * - P1: Zero-cost when off (no-op stub)
+ * - P2: Capture, don't intercept (steps call capture() explicitly)
+ * - P5: Non-blocking (failures swallowed, never block the pipeline)
+ */
+/**
+ * The contract for artifact capture during pipeline execution.
+ *
+ * Steps call capture() for in-memory data and captureFile() for
+ * artifacts already on disk. The orchestrator calls flush() once
+ * at pipeline end to write everything to the configured destination.
+ */
+export interface ArtifactCollector {
+    /**
+     * Record an in-memory artifact produced during pipeline execution.
+     *
+     * Callers need not check `enabled` before calling — the NoOp
+     * implementation is zero-cost, so unconditional calls are safe.
+     *
+     * @param step - Pipeline step name (e.g., "run-eval")
+     * @param type - Artifact type identifier (e.g., "eval-results")
+     * @param data - Content to serialize (JSON or text)
+     * @param meta - Optional metadata (variant, model, etc.)
+     */
+    capture(step: string, type: string, data: unknown, meta?: Record<string, unknown>): void;
+    /**
+     * Record a file reference for an artifact already on disk.
+     * The file is copied into the capture directory on flush().
+     *
+     * @param step - Pipeline step name
+     * @param type - Artifact type identifier
+     * @param filePath - Absolute path to the existing file
+     * @param meta - Optional metadata
+     */
+    captureFile(step: string, type: string, filePath: string, meta?: Record<string, unknown>): void;
+    /**
+     * Flush all captured artifacts to the configured destination.
+     * Called once at pipeline end by the orchestrator.
+     */
+    flush(): Promise<CaptureFlushResult>;
+    /** Whether capture is active */
+    readonly enabled: boolean;
+    /** Whether mode-specific extras are being captured */
+    readonly extrasEnabled: boolean;
+}
+/** Result of flushing captured artifacts to the destination. */
+export interface CaptureFlushResult {
+    /** Total number of artifacts captured */
+    artifactCount: number;
+    /** Output path (directory or .tar.gz) */
+    destination: string;
+    /** Total bytes written (uncompressed) */
+    totalBytes: number;
+    /** Whether output was compressed */
+    compressed: boolean;
+}
+/** A single entry in the capture manifest. */
+export interface ArtifactManifestEntry {
+    /** Pipeline step that produced this artifact */
+    step: string;
+    /** Artifact type identifier */
+    type: string;
+    /** Relative path within the capture directory */
+    path: string;
+    /** ISO 8601 timestamp of when capture() was called */
+    capturedAt: string;
+    /** Byte size of the artifact */
+    bytes: number;
+    /** Content format */
+    format: "json" | "markdown" | "text";
+    /** Optional metadata */
+    meta?: Record<string, unknown>;
+}
+/** The manifest.json written to each capture directory. */
+export interface ArtifactManifest {
+    version: 1;
+    captureId: string;
+    startedAt: string;
+    completedAt: string;
+    pipeline: {
+        mode: string;
+        variant?: string;
+        source?: string;
+        areas?: string[];
+    };
+    artifacts: ArtifactManifestEntry[];
+}

package/dist/_vendor/ailf-core/ports/artifact-collector.js ADDED Viewed

@@ -0,0 +1,13 @@
+/**
+ * Port: ArtifactCollector — captures pipeline artifacts during execution.
+ *
+ * Injected into AppContext. When capture is disabled (default), the
+ * composition root provides NoOpArtifactCollector. When --capture is
+ * set, provides FilesystemArtifactCollector.
+ *
+ * Design principles:
+ * - P1: Zero-cost when off (no-op stub)
+ * - P2: Capture, don't intercept (steps call capture() explicitly)
+ * - P5: Non-blocking (failures swallowed, never block the pipeline)
+ */
+export {};

package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts ADDED Viewed

@@ -0,0 +1,138 @@
+/**
+ * Types for cross-run capture comparison.
+ *
+ * The CaptureComparator reads two capture directories (baseline + experiment)
+ * and produces a CaptureDiffReport. Types are defined in core so external
+ * tooling can consume diff reports without depending on the eval package.
+ *
+ * Implementation lives in packages/eval/src/artifact-capture/comparator.ts.
+ */
+/** How deeply to compare artifacts. */
+export type ComparisonMode = "strict" | "structural" | "inventory";
+/** Configurable thresholds for comparison. */
+export interface ComparisonOptions {
+    /** Comparison depth: inventory (existence), structural (shape), strict (content) */
+    mode: ComparisonMode;
+    /** Score regression thresholds */
+    scoreThresholds?: {
+        /** Maximum allowed aggregate score delta (percentage points, default 5) */
+        aggregate: number;
+        /** Maximum allowed per-task score drop (points, default 10) */
+        perTask: number;
+    };
+    /** Timing regression thresholds */
+    timingThresholds?: {
+        /** Multiplier — flag steps exceeding this ratio (default 2.0) */
+        multiplier: number;
+        /** Per-step overrides (step name → custom multiplier) */
+        perStep?: Record<string, number>;
+    };
+    /** JSON structural diff depth (default 3) */
+    jsonDiffDepth?: number;
+    /** Additional ephemeral fields to ignore (merged with defaults) */
+    ephemeralFields?: string[];
+}
+/** Inventory diff — which artifacts exist in each capture. */
+export interface InventoryDiff {
+    /** Artifact types in experiment but not in baseline */
+    added: string[];
+    /** Artifact types in baseline but not in experiment */
+    removed: string[];
+    /** Artifact types present in both */
+    common: string[];
+}
+/** A single structural change in a JSON artifact. */
+export interface JsonDiffEntry {
+    /** JSON pointer path (e.g., "config.mode") */
+    path: string;
+    /** Value in baseline (undefined if key is added) */
+    baseline?: unknown;
+    /** Value in experiment (undefined if key is removed) */
+    experiment?: unknown;
+}
+/** Content diff for a single artifact. */
+export interface ArtifactContentDiff {
+    /** Artifact type identifier (step/type) */
+    artifactKey: string;
+    /** Content format */
+    format: "json" | "markdown" | "text";
+    /** Structural changes (JSON) or line diff summary (text/markdown) */
+    changes: JsonDiffEntry[] | {
+        addedLines: number;
+        removedLines: number;
+    };
+}
+/** Score comparison between two captures. */
+export interface ScoreComparison {
+    /** Baseline aggregate score */
+    baselineMean: number;
+    /** Experiment aggregate score */
+    currentMean: number;
+    /** Absolute delta (current - baseline) */
+    delta: number;
+    /** Per-task score deltas */
+    perTask: {
+        task: string;
+        baseline: number;
+        current: number;
+        delta: number;
+    }[];
+    /** Tasks that breached configured thresholds */
+    breaches: string[];
+}
+/** Timing comparison between two captures. */
+export interface TimingComparison {
+    /** Total pipeline duration delta in ms */
+    totalDeltaMs: number;
+    /** Per-step timing */
+    perStep: {
+        step: string;
+        baselineMs: number;
+        currentMs: number;
+        ratio: number;
+    }[];
+    /** Steps that breached the timing multiplier threshold */
+    breaches: string[];
+}
+/** Metadata comparison between two captures. */
+export interface MetadataComparison {
+    /** Whether pipeline modes match */
+    modeMatch: boolean;
+    /** Whether pipeline variants match */
+    variantMatch: boolean;
+    /** Config key differences */
+    configDiffs: JsonDiffEntry[];
+}
+/** Security scan results. */
+export interface SecurityScan {
+    /** Whether any potential secret leaks were found */
+    leaksFound: boolean;
+    /** Details of each violation */
+    violations: {
+        /** Relative artifact file path */
+        file: string;
+        /** Description of the finding */
+        detail: string;
+    }[];
+}
+/** The full diff report produced by CaptureComparator. */
+export interface CaptureDiffReport {
+    /** Are the two captures semantically equivalent? */
+    equivalent: boolean;
+    /** Human-readable summary (1-3 sentences) */
+    summary: string;
+    /** Comparison mode used */
+    mode: ComparisonMode;
+    /** Artifact inventory diff */
+    inventory: InventoryDiff;
+    /** Content diffs for common artifacts (structural/strict modes only) */
+    content?: ArtifactContentDiff[];
+    /** Score comparison (if score-summary exists in both captures) */
+    scores?: ScoreComparison;
+    /** Timing comparison (if pipeline-context exists in both captures) */
+    timing?: TimingComparison;
+    /** Metadata comparison */
+    metadata?: MetadataComparison;
+    /** Security scan results */
+    security: SecurityScan;
+}

package/dist/_vendor/ailf-core/ports/capture-comparator.js ADDED Viewed

@@ -0,0 +1,10 @@
+/**
+ * Types for cross-run capture comparison.
+ *
+ * The CaptureComparator reads two capture directories (baseline + experiment)
+ * and produces a CaptureDiffReport. Types are defined in core so external
+ * tooling can consume diff reports without depending on the eval package.
+ *
+ * Implementation lives in packages/eval/src/artifact-capture/comparator.ts.
+ */
+export {};

package/dist/_vendor/ailf-core/ports/context.d.ts CHANGED Viewed

@@ -12,6 +12,7 @@
  * as downstream consumers are converted to use them.
  */
 import type { DebugOptions, EvalMode, PluginRegistry } from "../types/index.js";
+import type { ArtifactCollector } from "./artifact-collector.js";
 import type { CacheStore } from "./cache-store.js";
 import type { DocFetcher } from "./doc-fetcher.js";
 import type { EvalRunner } from "./eval-runner.js";
@@ -78,6 +79,8 @@ export interface ResolvedConfig {
     noRemoteCache: boolean;
     /** Grader replications for consistency measurement */
     graderReplications?: number;
+    /** Base directory for user-facing pipeline output artifacts. */
+    outputDir: string;
     /** Output path override */
     outputPath?: string;
     /** Doc source URL overrides */
@@ -90,6 +93,12 @@ export interface ResolvedConfig {
     searchMode: "off" | "open" | "origin-only";
     /** Eval concurrency */
     concurrency?: number;
+    /**
+     * Maximum wall-clock time per eval step in ms.
+     * When exceeded, the subprocess is killed and partial results are used.
+     * Sourced from models config `evalBudgetMs`.
+     */
+    evalBudgetMs?: number;
     /** Promptfoo URL from eval output */
     promptfooUrl?: string;
     /** Sanity dataset override */
@@ -109,7 +118,7 @@ export interface ResolvedConfig {
     /** Before option for comparison */
     beforeOption?: string;
     /** Task source adapter selection */
-    taskSourceType?: "content-lake" | "repo" | "yaml";
+    taskSourceType?: "content-lake" | "repo";
     /** Path to repo-based tasks directory (e.g., .ailf/tasks/) */
     repoTasksPath?: string;
     /** Report store project ID from .ailf/config.yaml reportStore block */
@@ -142,6 +151,14 @@ export interface ResolvedConfig {
     apiKey?: string;
     /** External preset file paths or npm package names to load */
     presets?: string[];
+    /** Whether artifact capture is enabled for this run (default: false) */
+    captureEnabled?: boolean;
+    /** Base directory for capture output (default: results/captures/) */
+    captureDir?: string;
+    /** Whether to compress capture output to tar.gz (default: true) */
+    captureCompress?: boolean;
+    /** Whether to include mode-specific extra artifacts (default: true) */
+    captureExtras?: boolean;
 }
 /**
  * Application context — the complete dependency carrier.
@@ -158,6 +175,8 @@ export interface ResolvedConfig {
 export interface AppContext {
     /** Evaluation caching (filesystem + optional Content Lake fallback) */
     readonly cache?: CacheStore;
+    /** Artifact capture collector (no-op when --capture is not set) */
+    readonly collector: ArtifactCollector;
     /** Resolved pipeline configuration */
     readonly config: ResolvedConfig;
     /** Documentation context fetcher */

package/dist/_vendor/ailf-core/ports/eval-runner.d.ts CHANGED Viewed

@@ -15,6 +15,12 @@ export interface EvalRunConfig {
     concurrency?: number;
     /** Environment variables to pass to the eval process */
     env?: Record<string, string>;
+    /**
+     * Maximum wall-clock time for this eval subprocess in ms.
+     * When exceeded, the process is killed and partial results are used.
+     * Default: no limit (backward compatible).
+     */
+    maxDurationMs?: number;
 }
 export interface EvalRunner {
     /** Run an evaluation and return the step result */

package/dist/_vendor/ailf-core/ports/index.d.ts CHANGED Viewed

@@ -4,6 +4,8 @@
  * Ports define the contracts between the domain kernel and the outside world.
  * Adapters (in packages/eval) implement these interfaces.
  */
+export type { ArtifactCollector, ArtifactManifest, ArtifactManifestEntry, CaptureFlushResult, } from "./artifact-collector.js";
+export type { ArtifactContentDiff, CaptureDiffReport, ComparisonMode, ComparisonOptions, InventoryDiff, JsonDiffEntry, MetadataComparison, ScoreComparison, SecurityScan, TimingComparison, } from "./capture-comparator.js";
 export type { CacheEntryMetadata, CacheKey, CacheLookupResult, CacheRecordInput, CacheStore, } from "./cache-store.js";
 export type { ConfigSource } from "./config-source.js";
 export type { AppContext, ReportSinkPort, ReportStorePort, ResolvedConfig, } from "./context.js";

package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts CHANGED Viewed

@@ -38,6 +38,17 @@ export interface PipelineStep {
      * When defined, the StepRunner computes a hash and checks the cache.
      */
     cacheInputs?(ctx: AppContext): string[];
+    /**
+     * Cache context strings — non-file state that participates in cache key
+     * computation (e.g., mode, variant, area/task/tag filters).
+     *
+     * Without these, two runs with different CLI flags but identical config
+     * files would share a cache entry, causing cross-mode or cross-area
+     * contamination.
+     *
+     * When undefined, only file content determines the cache key.
+     */
+    cacheContext?(ctx: AppContext): string[];
     /**
      * Whether this step is optional — a failure in an optional step
      * does not stop the pipeline.

package/dist/_vendor/ailf-core/ports/task-source.d.ts CHANGED Viewed

@@ -2,9 +2,9 @@
  * Port: Where task definitions come from.
  *
  * Adapters:
- * - YamlTaskSource (current) — reads tasks/*.yaml files
- * - ContentLakeTaskSource (tasks-as-content Phase 2) — GROQ query
- * - RepoTaskSource (tasks-as-content Phase 4) — reads .ailf/tasks/
+ * - ContentLakeTaskSource — GROQ query against Sanity Content Lake
+ * - RepoTaskSource — reads .ailf/tasks/*.task.ts files
+ * - TsTaskFileLoader — reads tasks/{mode}/*.task.ts (eval package)
  *
  * The key invariant: the pipeline orchestrator and all downstream steps
  * work with GeneralizedTaskDefinition[] regardless of where they came from.

package/dist/_vendor/ailf-core/ports/task-source.js CHANGED Viewed

@@ -2,9 +2,9 @@
  * Port: Where task definitions come from.
  *
  * Adapters:
- * - YamlTaskSource (current) — reads tasks/*.yaml files
- * - ContentLakeTaskSource (tasks-as-content Phase 2) — GROQ query
- * - RepoTaskSource (tasks-as-content Phase 4) — reads .ailf/tasks/
+ * - ContentLakeTaskSource — GROQ query against Sanity Content Lake
+ * - RepoTaskSource — reads .ailf/tasks/*.task.ts files
+ * - TsTaskFileLoader — reads tasks/{mode}/*.task.ts (eval package)
  *
  * The key invariant: the pipeline orchestrator and all downstream steps
  * work with GeneralizedTaskDefinition[] regardless of where they came from.

package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts CHANGED Viewed

@@ -77,10 +77,15 @@ export declare const PipelineRequestSchema: z.ZodObject<{
     taskMode: z.ZodOptional<z.ZodEnum<{
         inline: "inline";
         "content-lake": "content-lake";
-        yaml: "yaml";
     }>>;
     tasks: z.ZodOptional<z.ZodArray<z.ZodString>>;
     urls: z.ZodOptional<z.ZodArray<z.ZodString>>;
+    variant: z.ZodOptional<z.ZodEnum<{
+        baseline: "baseline";
+        agentic: "agentic";
+        observed: "observed";
+        full: "full";
+    }>>;
     presets: z.ZodOptional<z.ZodArray<z.ZodString>>;
 }, z.core.$strip>;
 /** Inferred TypeScript type for a pipeline request payload. */

package/dist/_vendor/ailf-core/schemas/pipeline-request.js CHANGED Viewed

@@ -13,7 +13,7 @@
  * @see packages/eval/src/pipeline/map-request-to-config.ts — maps to ResolvedConfig
  */
 import { z } from "zod";
-import { RAW_EVAL_MODES } from "../../ailf-shared/index.js";
+import { LITERACY_VARIANTS, RAW_EVAL_MODES } from "../../ailf-shared/index.js";
 // ---------------------------------------------------------------------------
 // Debug options — boolean shorthand or structured object
 // ---------------------------------------------------------------------------
@@ -86,9 +86,21 @@ export const PipelineRequestSchema = z.object({
     searchMode: z.enum(["off", "open", "origin-only"]).optional(),
     source: z.string().optional(),
     sourceReportId: z.string().optional(),
-    taskMode: z.enum(["content-lake", "yaml", "inline"]).optional(),
+    taskMode: z.enum(["content-lake", "inline"]).optional(),
     tasks: z.array(z.string()).optional(),
     urls: z.array(z.string().url()).optional(),
+    /**
+     * Literacy variant — only meaningful when mode is "literacy".
+     *
+     * When provided with a canonical mode (`mode: "literacy"`), this field
+     * specifies the variant directly. When mode is a legacy alias (e.g.,
+     * `mode: "baseline"`), the variant is derived from the mode name and
+     * this field is ignored.
+     *
+     * Prefer explicit `mode: "literacy", variant: "baseline"` over the
+     * legacy `mode: "baseline"` form.
+     */
+    variant: z.enum(LITERACY_VARIANTS).optional(),
     /** External preset file paths or npm package names to load */
     presets: z.array(z.string()).optional(),
 });

package/dist/_vendor/ailf-core/services/config-helpers.d.ts CHANGED Viewed

@@ -6,7 +6,9 @@
  * Extracted from packages/eval/src/lib/generate-configs.ts during
  * the Ports & Adapters migration (Phase 4e).
  */
+import type { EvalMode } from "../../ailf-shared/index.d.ts";
 import type { ModelEntry } from "../types/index.js";
+import type { ModeBase } from "../types/plugin-registry.js";
 /**
  * Extract the raw API model name from a Promptfoo provider ID.
  *
@@ -38,4 +40,17 @@ export declare function mergeConfig(defaults: Record<string, unknown>, modelConf
  *
  * Models without a `modes` field match all modes.
  */
-export declare function modelMatchesMode(model: ModelEntry, mode: string): boolean;
+export declare function modelMatchesMode(model: ModelEntry, mode: EvalMode): boolean;
+/**
+ * Resolve which variants a model participates in for a given mode.
+ *
+ * Resolution rules:
+ * - If the mode has no variants defined → returns `undefined` (no variant filtering)
+ * - If the model specifies variants for this mode → returns that whitelist
+ * - If the model omits variants for this mode → returns ALL mode variants (default)
+ *
+ * @param model  - The model entry from models config
+ * @param modeBase - The mode base (contains variant definitions)
+ * @returns Array of variant IDs, or `undefined` if the mode has no variants
+ */
+export declare function resolveModelVariants(model: ModelEntry, modeBase: ModeBase): string[] | undefined;

package/dist/_vendor/ailf-core/services/config-helpers.js CHANGED Viewed

@@ -84,3 +84,24 @@ export function modelMatchesMode(model, mode) {
     }
     return model.modes.includes(mode);
 }
+/**
+ * Resolve which variants a model participates in for a given mode.
+ *
+ * Resolution rules:
+ * - If the mode has no variants defined → returns `undefined` (no variant filtering)
+ * - If the model specifies variants for this mode → returns that whitelist
+ * - If the model omits variants for this mode → returns ALL mode variants (default)
+ *
+ * @param model  - The model entry from models config
+ * @param modeBase - The mode base (contains variant definitions)
+ * @returns Array of variant IDs, or `undefined` if the mode has no variants
+ */
+export function resolveModelVariants(model, modeBase) {
+    const modeVariants = modeBase.mode.variants;
+    if (!modeVariants || modeVariants.length === 0)
+        return undefined;
+    const allVariantIds = modeVariants.map((v) => v.id);
+    const modeId = modeBase.mode.id;
+    const explicit = model.variants?.[modeId];
+    return explicit ?? allVariantIds;
+}

package/dist/_vendor/ailf-core/services/index.d.ts CHANGED Viewed

@@ -10,4 +10,4 @@
 export { classifyRubric, detectFeatureArea, extractDimensions, extractUrlMetadata, mergeScores, parseRubricScore, } from "./scoring.js";
 export { formatComparisonMarkdown, formatComparisonTable, } from "./comparison-formatters.js";
 export { aggregateAreas, aggregateDimensions, computeEnsembleScore, computeTaskScore, normalizeScore, type AggregationStrategy, type AreaScore, type AssertionScore, type DimensionScore, type EnsembleGradingConfig, type GraderTransitionConfig, type TaskScore, type TaskScoreOptions, } from "./scoring-engine.js";
-export { extractModelName, extractProvider, mergeConfig, modelMatchesMode, } from "./config-helpers.js";
+export { extractModelName, extractProvider, mergeConfig, modelMatchesMode, resolveModelVariants, } from "./config-helpers.js";

package/dist/_vendor/ailf-core/services/index.js CHANGED Viewed

@@ -10,4 +10,4 @@
 export { classifyRubric, detectFeatureArea, extractDimensions, extractUrlMetadata, mergeScores, parseRubricScore, } from "./scoring.js";
 export { formatComparisonMarkdown, formatComparisonTable, } from "./comparison-formatters.js";
 export { aggregateAreas, aggregateDimensions, computeEnsembleScore, computeTaskScore, normalizeScore, } from "./scoring-engine.js";
-export { extractModelName, extractProvider, mergeConfig, modelMatchesMode, } from "./config-helpers.js";
+export { extractModelName, extractProvider, mergeConfig, modelMatchesMode, resolveModelVariants, } from "./config-helpers.js";

package/dist/_vendor/ailf-core/services/scoring.js CHANGED Viewed

@@ -65,6 +65,15 @@ export function classifyRubric(component) {
  */
 export function detectFeatureArea(description) {
     const desc = description.toLowerCase();
+    if (desc.includes("portable text")) {
+        return "portable-text";
+    }
+    if (desc.includes("content lake")) {
+        return "content-lake";
+    }
+    if (desc.includes("image handling") || desc.includes("image asset")) {
+        return "image-handling";
+    }
     if (desc.includes("studio")) {
         return "studio-setup";
     }

package/dist/_vendor/ailf-core/types/generalized-task.d.ts CHANGED Viewed

@@ -8,7 +8,7 @@
  *
  * @see docs/design-docs/architecture-overhaul/domain-model.md (canonical)
  * @see docs/design-docs/architecture-overhaul/test-definition.md (authoring surfaces)
- * @see docs/exec-plans/architecture-overhaul/phase-0-foundation-types.md (task 0h)
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-0-foundation-types.md (task 0h)
  */
 /** Difficulty level for a task */
 export type TaskDifficulty = "basic" | "intermediate" | "advanced";
@@ -178,6 +178,17 @@ export interface MCPServerTaskDefinition extends TaskCommonFields {
         url?: string;
         /** Environment variables for the server process */
         env?: Record<string, string>;
+        /**
+         * HTTP headers for remote transports (sse / streamable-http).
+         * Merged on top of any auth-derived headers, so explicit values
+         * here take precedence over `auth`-generated headers.
+         *
+         * Values support `{{env.VAR}}` template syntax for secrets.
+         *
+         * @example
+         * headers: { Authorization: "Bearer {{env.SANITY_API_TOKEN}}" }
+         */
+        headers?: Record<string, string>;
         /** Startup timeout in milliseconds */
         startupTimeoutMs?: number;
         /**

package/dist/_vendor/ailf-core/types/generalized-task.js CHANGED Viewed

@@ -8,6 +8,6 @@
  *
  * @see docs/design-docs/architecture-overhaul/domain-model.md (canonical)
  * @see docs/design-docs/architecture-overhaul/test-definition.md (authoring surfaces)
- * @see docs/exec-plans/architecture-overhaul/phase-0-foundation-types.md (task 0h)
+ * @see docs/archive/exec-plans/architecture-overhaul/phase-0-foundation-types.md (task 0h)
  */
 export {};

package/dist/_vendor/ailf-core/types/index.d.ts CHANGED Viewed

@@ -9,7 +9,7 @@
  * Ports & Adapters migration (Phase 0c). The original file is now a
  * re-export barrel that preserves backward compatibility.
  */
-import type { DocumentRef as _DocumentRef, EvalMode as _EvalMode } from "../../ailf-shared/index.d.ts";
+import type { DocumentRef as _DocumentRef, EvalMode } from "../../ailf-shared/index.d.ts";
 export type { ActualScoreEntry, ComponentResult, TestResult, UrlMetadata, } from "./scoring-input.js";
 export type { DocumentRef } from "../../ailf-shared/index.d.ts";
 export type { StoredBaseline, StoredReport, StoredRun, StoredTaskResult, StoredTrace, SchemaVersioned, } from "./storage-schema.js";
@@ -25,7 +25,6 @@ export type { ArtifactId, Brand, Err, FixtureId, IdValidationError, NewReportId,
 export { err, fixtureId, ok, providerId, resultId, runId, suiteId, taskId, traceId, } from "./branded-ids.js";
 export type { AgentHarnessTaskDefinition, CustomTaskDefinition, GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, GeneralizedValueAssertion, IdDocRef, KnowledgeProbeTaskDefinition, LiteracyTaskDefinition, MCPServerTaskDefinition, PathDocRef, PerspectiveDocRef, RubricRef, SlugDocRef, TaskCommonFields, TaskDifficulty, TaskOptions, TaskProviderConfig, TaskStatus, } from "./generalized-task.js";
 type DocumentRef = _DocumentRef;
-type EvalMode = _EvalMode;
 /** Aggregated retrieval metrics for a feature area */
 export interface AreaRetrievalMetrics {
     area: string;
@@ -119,7 +118,7 @@ export interface FailureModeReport {
     totalJudgments: number;
 }
 /** Failure mode classification for a low-scoring judgment */
-export type FailureModeType = "incorrect-docs" | "missing-docs" | "model-limitation" | "outdated-docs" | "poor-structure" | "unclassified";
+export type FailureModeType = "api-error" | "incorrect-docs" | "missing-docs" | "model-limitation" | "outdated-docs" | "poor-structure" | "unclassified";
 /** Per-feature-area score breakdown */
 export interface FeatureScore {
     /**
@@ -352,11 +351,40 @@ export interface ModelEntry {
     env?: string;
     id: string;
     label: string;
-    modes?: string[];
+    /**
+     * Which evaluation modes this model participates in.
+     *
+     * Values must be canonical eval mode names (e.g., "literacy", "mcp-server").
+     * When omitted, the model participates in all modes.
+     */
+    modes?: EvalMode[];
+    /**
+     * Per-provider timeout in ms. Emitted into Promptfoo provider config.
+     * Default: 300_000 (5 min, matching Promptfoo's built-in default).
+     */
+    timeoutMs?: number;
+    /**
+     * Per-mode variant whitelist. Keys are eval mode IDs, values are arrays
+     * of variant IDs to include for that mode.
+     *
+     * When a model enrolls in a mode (via `modes`) but does not specify
+     * variants for it here, ALL variants defined by the mode base are included.
+     *
+     * Only meaningful for modes that define variants (e.g., literacy has
+     * "baseline", "observed", "agentic-naive", "agentic-optimized").
+     * Ignored for modes without variants.
+     */
+    variants?: Partial<Record<EvalMode, string[]>>;
 }
 /** Parsed config/models.yaml structure */
 export interface ModelsConfig {
     defaults: Record<string, unknown>;
+    /**
+     * Maximum wall-clock time per eval step (all tests for one mode) in ms.
+     * When exceeded, the subprocess is killed and partial results are used.
+     * Default: no limit (backward compatible).
+     */
+    evalBudgetMs?: number;
     grader: {
         id: string;
         label?: string;
@@ -507,6 +535,21 @@ export interface TestSummary {
         task: string;
         error: string;
     }[];
+    /** Per-test timing statistics (when latencyMs is available from Promptfoo) */
+    timing?: {
+        /** Median test duration in ms */
+        medianMs: number;
+        /** 95th percentile test duration in ms */
+        p95Ms: number;
+        /** Maximum test duration in ms */
+        maxMs: number;
+        /** Tests that exceeded the slow threshold (2x median, min 60s) */
+        slowTests: {
+            task: string;
+            model: string;
+            durationMs: number;
+        }[];
+    };
 }
 /** Token usage and estimated cost for a pipeline run. */
 export interface PipelineUsage {