@sanity/ailf 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +28 -23
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +6 -0
- package/dist/_vendor/ailf-core/config-helpers.js +29 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +164 -94
- package/dist/_vendor/ailf-core/examples/index.js +208 -114
- package/dist/_vendor/ailf-core/index.d.ts +1 -0
- package/dist/_vendor/ailf-core/index.js +1 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +20 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +14 -2
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +12 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +47 -4
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +27 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
- package/dist/_vendor/ailf-tasks/cli.js +61 -0
- package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
- package/dist/_vendor/ailf-tasks/index.js +16 -0
- package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
- package/dist/_vendor/ailf-tasks/parser.js +73 -0
- package/dist/_vendor/ailf-tasks/schemas.d.ts +198 -0
- package/dist/_vendor/ailf-tasks/schemas.js +180 -0
- package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
- package/dist/_vendor/ailf-tasks/validation.js +162 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +6 -1
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +4 -6
- package/dist/adapters/task-sources/index.d.ts +1 -2
- package/dist/adapters/task-sources/index.js +1 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
- package/dist/adapters/task-sources/repo-schemas.js +2 -2
- package/dist/adapters/task-sources/repo-task-source.js +1 -1
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/task-file-loader.d.ts +9 -6
- package/dist/adapters/task-sources/task-file-loader.js +20 -6
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +2 -3
- package/dist/commands/init.js +56 -170
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/composition-root.d.ts +2 -3
- package/dist/composition-root.js +27 -14
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +30 -16
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +50 -15
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +52 -32
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/checks.d.ts +8 -3
- package/dist/pipeline/checks.js +23 -3
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +33 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/literacy-bridge.js +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +1 -1
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +1 -1
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +13 -2
- package/dist/pipeline/compiler/mode-bases/literacy.js +55 -1
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +2 -2
- package/dist/pipeline/compiler/mode-handlers/index.js +2 -2
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +69 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +307 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +22 -5
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +6 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +10 -5
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +314 -7
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +10 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +1 -1
- package/dist/pipeline/compiler/presets/sanity-literacy.js +1 -1
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +15 -8
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +15 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
- package/dist/pipeline/mirror-repo-tasks.js +1 -1
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +67 -29
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +25 -25
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
|
@@ -14,6 +14,25 @@ import type { PromptTemplate } from "../ports/mode-handler.js";
|
|
|
14
14
|
import type { DocFetcher } from "../ports/doc-fetcher.js";
|
|
15
15
|
import type { SourceEntry } from "../config-helpers.js";
|
|
16
16
|
import type { FeatureRegistry } from "../schemas/pipeline.js";
|
|
17
|
+
/**
|
|
18
|
+
* A named variant within an evaluation mode.
|
|
19
|
+
*
|
|
20
|
+
* Modes can declare variants to represent different evaluation strategies
|
|
21
|
+
* that share the same methodology. For example, the literacy mode has
|
|
22
|
+
* "baseline", "observed", and "agentic-*" variants.
|
|
23
|
+
*
|
|
24
|
+
* Models opt into specific variants via `ModelEntry.variants`. When a model
|
|
25
|
+
* enrolls in a mode without specifying variants, all defined variants are
|
|
26
|
+
* included by default.
|
|
27
|
+
*/
|
|
28
|
+
export interface ModeVariantDefinition {
|
|
29
|
+
/** Variant identifier (e.g., "baseline", "agentic-naive") */
|
|
30
|
+
id: string;
|
|
31
|
+
/** Human-readable label (e.g., "Standard (baseline)") */
|
|
32
|
+
label: string;
|
|
33
|
+
/** Optional description for docs/CLI help */
|
|
34
|
+
description?: string;
|
|
35
|
+
}
|
|
17
36
|
/** A registered evaluation mode handler */
|
|
18
37
|
export interface ModeRegistration {
|
|
19
38
|
/** Unique mode identifier (e.g., "api-contract") */
|
|
@@ -26,6 +45,14 @@ export interface ModeRegistration {
|
|
|
26
45
|
rubricTemplateIds: string[];
|
|
27
46
|
/** Compile function module path (loaded at runtime) */
|
|
28
47
|
handlerModule: string;
|
|
48
|
+
/**
|
|
49
|
+
* Variants this mode supports. Omit or empty for modes without variants.
|
|
50
|
+
*
|
|
51
|
+
* When defined, models can selectively opt into specific variants via
|
|
52
|
+
* `ModelEntry.variants`. Models that enroll in the mode without specifying
|
|
53
|
+
* variants participate in all defined variants.
|
|
54
|
+
*/
|
|
55
|
+
variants?: ModeVariantDefinition[];
|
|
29
56
|
}
|
|
30
57
|
/** A registered assertion type */
|
|
31
58
|
export interface AssertionRegistration {
|
|
@@ -37,6 +37,21 @@ export type RawEvalMode = EvalMode | "agentic" | "baseline" | "full" | "observed
|
|
|
37
37
|
export declare const CANONICAL_EVAL_MODES: readonly ["literacy", "mcp-server", "agent-harness", "knowledge-probe", "custom"];
|
|
38
38
|
/** Legacy CLI aliases that map to `mode: "literacy"` + variant. */
|
|
39
39
|
export declare const LEGACY_EVAL_MODE_ALIASES: readonly ["baseline", "agentic", "observed", "full"];
|
|
40
|
+
/**
|
|
41
|
+
* Literacy mode variant names — each is a distinct evaluation strategy.
|
|
42
|
+
*
|
|
43
|
+
* These are the valid values for the `variant` field in PipelineRequest
|
|
44
|
+
* when `mode` is `"literacy"`. They match LEGACY_EVAL_MODE_ALIASES because
|
|
45
|
+
* variants were originally exposed as top-level mode names.
|
|
46
|
+
*
|
|
47
|
+
* - `baseline` — with-docs / without-docs comparison (gold + floor)
|
|
48
|
+
* - `agentic` — model uses tools to find docs (gold only)
|
|
49
|
+
* - `observed` — HTTP-instrumented behavior observation
|
|
50
|
+
* - `full` — combined baseline + agentic
|
|
51
|
+
*/
|
|
52
|
+
export declare const LITERACY_VARIANTS: readonly ["baseline", "agentic", "observed", "full"];
|
|
53
|
+
/** Union of all literacy variant string values. */
|
|
54
|
+
export type LiteracyVariant = (typeof LITERACY_VARIANTS)[number];
|
|
40
55
|
/**
|
|
41
56
|
* All accepted mode names for Zod enum construction.
|
|
42
57
|
* Canonical modes first, then legacy aliases.
|
|
@@ -22,6 +22,24 @@ export const LEGACY_EVAL_MODE_ALIASES = [
|
|
|
22
22
|
"observed",
|
|
23
23
|
"full",
|
|
24
24
|
];
|
|
25
|
+
/**
|
|
26
|
+
* Literacy mode variant names — each is a distinct evaluation strategy.
|
|
27
|
+
*
|
|
28
|
+
* These are the valid values for the `variant` field in PipelineRequest
|
|
29
|
+
* when `mode` is `"literacy"`. They match LEGACY_EVAL_MODE_ALIASES because
|
|
30
|
+
* variants were originally exposed as top-level mode names.
|
|
31
|
+
*
|
|
32
|
+
* - `baseline` — with-docs / without-docs comparison (gold + floor)
|
|
33
|
+
* - `agentic` — model uses tools to find docs (gold only)
|
|
34
|
+
* - `observed` — HTTP-instrumented behavior observation
|
|
35
|
+
* - `full` — combined baseline + agentic
|
|
36
|
+
*/
|
|
37
|
+
export const LITERACY_VARIANTS = [
|
|
38
|
+
"baseline",
|
|
39
|
+
"agentic",
|
|
40
|
+
"observed",
|
|
41
|
+
"full",
|
|
42
|
+
];
|
|
25
43
|
/**
|
|
26
44
|
* All accepted mode names for Zod enum construction.
|
|
27
45
|
* Canonical modes first, then legacy aliases.
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* cli.ts — Minimal CLI for standalone task validation.
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* npx @sanity/ailf-tasks validate .ailf/tasks/
|
|
6
|
+
* npx @sanity/ailf-tasks validate # defaults to .ailf/tasks/
|
|
7
|
+
*/
|
|
8
|
+
import { loadTaskDir } from "./parser.js";
|
|
9
|
+
import { formatValidationResult, validateRepoTasks } from "./validation.js";
|
|
10
|
+
export function run() {
|
|
11
|
+
const args = process.argv.slice(2);
|
|
12
|
+
const command = args[0];
|
|
13
|
+
if (command === "validate") {
|
|
14
|
+
const dir = args[1] ?? ".ailf/tasks";
|
|
15
|
+
validateCommand(dir);
|
|
16
|
+
}
|
|
17
|
+
else if (command === "--help" ||
|
|
18
|
+
command === "-h" ||
|
|
19
|
+
command === undefined) {
|
|
20
|
+
printUsage();
|
|
21
|
+
}
|
|
22
|
+
else {
|
|
23
|
+
console.error(`Unknown command: ${command}`);
|
|
24
|
+
printUsage();
|
|
25
|
+
process.exit(1);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
function validateCommand(dir) {
|
|
29
|
+
try {
|
|
30
|
+
const tasks = loadTaskDir(dir);
|
|
31
|
+
// Run semantic validation
|
|
32
|
+
const result = validateRepoTasks(tasks);
|
|
33
|
+
const formatted = formatValidationResult(result);
|
|
34
|
+
console.log(`✅ ${tasks.length} task(s) validated from ${dir}`);
|
|
35
|
+
for (const task of tasks) {
|
|
36
|
+
console.log(` ${task.id} — ${task.description}`);
|
|
37
|
+
}
|
|
38
|
+
if (result.warnings.length > 0 || result.errors.length > 0) {
|
|
39
|
+
console.log("");
|
|
40
|
+
console.log(formatted);
|
|
41
|
+
}
|
|
42
|
+
if (!result.valid) {
|
|
43
|
+
process.exit(1);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
catch (err) {
|
|
47
|
+
console.error(`❌ ${err instanceof Error ? err.message : String(err)}`);
|
|
48
|
+
process.exit(1);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
function printUsage() {
|
|
52
|
+
console.log("Usage: ailf-tasks <command> [options]");
|
|
53
|
+
console.log("");
|
|
54
|
+
console.log("Commands:");
|
|
55
|
+
console.log(" validate [dir] Validate task YAML files (default: .ailf/tasks/)");
|
|
56
|
+
console.log("");
|
|
57
|
+
console.log("Examples:");
|
|
58
|
+
console.log(" ailf-tasks validate");
|
|
59
|
+
console.log(" ailf-tasks validate .ailf/tasks/");
|
|
60
|
+
console.log(" ailf-tasks validate /path/to/tasks/");
|
|
61
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @sanity/ailf-tasks — Task definition schemas and YAML parser.
|
|
3
|
+
*
|
|
4
|
+
* Lightweight package for parsing and validating .ailf/tasks/*.yaml files
|
|
5
|
+
* without depending on the full AILF CLI or its heavyweight dependencies
|
|
6
|
+
* (Promptfoo, LLM SDKs, Sanity client).
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* import { parseTaskFile, loadTaskDir, RepoTaskSchema } from '@sanity/ailf-tasks'
|
|
10
|
+
*/
|
|
11
|
+
export { CURATED_ASSERTION_TYPES, RepoTaskFileSchema, RepoTaskSchema, RUBRIC_TEMPLATE_NAMES, type CuratedAssertionType, type RepoTask, type RubricTemplateName, } from "./schemas.js";
|
|
12
|
+
export { loadTaskDir, parseTaskFile } from "./parser.js";
|
|
13
|
+
export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, type ValidationMessage, type ValidationResult, } from "./validation.js";
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @sanity/ailf-tasks — Task definition schemas and YAML parser.
|
|
3
|
+
*
|
|
4
|
+
* Lightweight package for parsing and validating .ailf/tasks/*.yaml files
|
|
5
|
+
* without depending on the full AILF CLI or its heavyweight dependencies
|
|
6
|
+
* (Promptfoo, LLM SDKs, Sanity client).
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* import { parseTaskFile, loadTaskDir, RepoTaskSchema } from '@sanity/ailf-tasks'
|
|
10
|
+
*/
|
|
11
|
+
// Schemas and types
|
|
12
|
+
export { CURATED_ASSERTION_TYPES, RepoTaskFileSchema, RepoTaskSchema, RUBRIC_TEMPLATE_NAMES, } from "./schemas.js";
|
|
13
|
+
// Parsing
|
|
14
|
+
export { loadTaskDir, parseTaskFile } from "./parser.js";
|
|
15
|
+
// Validation
|
|
16
|
+
export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, } from "./validation.js";
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* parser.ts — Standalone task file and directory parsing.
|
|
3
|
+
*
|
|
4
|
+
* High-level functions for loading and validating .ailf/tasks/ YAML
|
|
5
|
+
* files without any dependency on the eval pipeline.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* import { parseTaskFile, loadTaskDir } from '@sanity/ailf-tasks'
|
|
9
|
+
*/
|
|
10
|
+
import { type RepoTask } from "./schemas.js";
|
|
11
|
+
/**
|
|
12
|
+
* Parse a single task YAML string and return validated tasks.
|
|
13
|
+
*
|
|
14
|
+
* @param content - Raw YAML string content
|
|
15
|
+
* @param filename - Source filename (for error messages)
|
|
16
|
+
* @returns Validated array of RepoTask objects
|
|
17
|
+
* @throws Error if YAML parsing or Zod validation fails
|
|
18
|
+
*/
|
|
19
|
+
export declare function parseTaskFile(content: string, filename?: string): RepoTask[];
|
|
20
|
+
/**
|
|
21
|
+
* Load and parse all task YAML files from a directory.
|
|
22
|
+
*
|
|
23
|
+
* @param dirPath - Path to directory containing .yaml/.yml files
|
|
24
|
+
* @returns All validated tasks, sorted by filename
|
|
25
|
+
* @throws Error if directory not found, no YAML files, or validation fails
|
|
26
|
+
*/
|
|
27
|
+
export declare function loadTaskDir(dirPath: string): RepoTask[];
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* parser.ts — Standalone task file and directory parsing.
|
|
3
|
+
*
|
|
4
|
+
* High-level functions for loading and validating .ailf/tasks/ YAML
|
|
5
|
+
* files without any dependency on the eval pipeline.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* import { parseTaskFile, loadTaskDir } from '@sanity/ailf-tasks'
|
|
9
|
+
*/
|
|
10
|
+
import { existsSync, readdirSync, readFileSync } from "fs";
|
|
11
|
+
import { resolve } from "path";
|
|
12
|
+
import { load } from "js-yaml";
|
|
13
|
+
import { RepoTaskFileSchema } from "./schemas.js";
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
// Public API
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
/**
|
|
18
|
+
* Parse a single task YAML string and return validated tasks.
|
|
19
|
+
*
|
|
20
|
+
* @param content - Raw YAML string content
|
|
21
|
+
* @param filename - Source filename (for error messages)
|
|
22
|
+
* @returns Validated array of RepoTask objects
|
|
23
|
+
* @throws Error if YAML parsing or Zod validation fails
|
|
24
|
+
*/
|
|
25
|
+
export function parseTaskFile(content, filename = "<string>") {
|
|
26
|
+
const parsed = load(content);
|
|
27
|
+
if (!Array.isArray(parsed)) {
|
|
28
|
+
throw new Error(`${filename} did not parse to an array of tasks. ` +
|
|
29
|
+
"Task files must contain a YAML array of task definitions.");
|
|
30
|
+
}
|
|
31
|
+
const result = RepoTaskFileSchema.safeParse(parsed);
|
|
32
|
+
if (!result.success) {
|
|
33
|
+
const messages = result.error.issues
|
|
34
|
+
.map((i) => ` [${i.path.join(".")}]: ${i.message}`)
|
|
35
|
+
.join("\n");
|
|
36
|
+
throw new Error(`Invalid task file "${filename}":\n${messages}`);
|
|
37
|
+
}
|
|
38
|
+
return result.data;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Load and parse all task YAML files from a directory.
|
|
42
|
+
*
|
|
43
|
+
* @param dirPath - Path to directory containing .yaml/.yml files
|
|
44
|
+
* @returns All validated tasks, sorted by filename
|
|
45
|
+
* @throws Error if directory not found, no YAML files, or validation fails
|
|
46
|
+
*/
|
|
47
|
+
export function loadTaskDir(dirPath) {
|
|
48
|
+
if (!existsSync(dirPath)) {
|
|
49
|
+
throw new Error(`Tasks directory not found: ${dirPath}\n` +
|
|
50
|
+
" Expected a directory containing .ailf/tasks/*.yaml files.");
|
|
51
|
+
}
|
|
52
|
+
const yamlFiles = readdirSync(dirPath)
|
|
53
|
+
.filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."))
|
|
54
|
+
.sort();
|
|
55
|
+
if (yamlFiles.length === 0) {
|
|
56
|
+
throw new Error(`No YAML files found in ${dirPath}\n` +
|
|
57
|
+
" Expected .ailf/tasks/*.yaml files with task definitions.");
|
|
58
|
+
}
|
|
59
|
+
const allTasks = [];
|
|
60
|
+
for (const file of yamlFiles) {
|
|
61
|
+
const filePath = resolve(dirPath, file);
|
|
62
|
+
const content = readFileSync(filePath, "utf-8");
|
|
63
|
+
try {
|
|
64
|
+
const tasks = parseTaskFile(content, file);
|
|
65
|
+
allTasks.push(...tasks);
|
|
66
|
+
}
|
|
67
|
+
catch (err) {
|
|
68
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
69
|
+
throw new Error(`Failed to load ${file}:\n${msg}`, { cause: err });
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return allTasks;
|
|
73
|
+
}
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* schemas.ts — Zod schemas for repo-based task definitions.
|
|
3
|
+
*
|
|
4
|
+
* Validates .ailf/tasks/*.yaml task files from external repositories.
|
|
5
|
+
* These schemas are the contract between external repos and the AILF eval
|
|
6
|
+
* pipeline — they define exactly what fields are accepted, with friendly
|
|
7
|
+
* error messages for authors writing task YAML by hand.
|
|
8
|
+
*
|
|
9
|
+
* This module is the single source of truth for task schemas. The eval
|
|
10
|
+
* package re-exports from here to avoid duplication.
|
|
11
|
+
*
|
|
12
|
+
* @see docs/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
|
|
13
|
+
*/
|
|
14
|
+
import { z } from "zod";
|
|
15
|
+
/**
|
|
16
|
+
* The set of assertion types allowed in repo-based task files.
|
|
17
|
+
*
|
|
18
|
+
* This is a curated subset of Promptfoo assertion types — we expose only the
|
|
19
|
+
* types that are stable, well-documented, and useful for external authors.
|
|
20
|
+
*/
|
|
21
|
+
export declare const CURATED_ASSERTION_TYPES: readonly ["llm-rubric", "contains", "contains-any", "contains-all", "not-contains", "icontains", "icontains-any", "regex", "javascript", "similar", "cost", "latency"];
|
|
22
|
+
export type CuratedAssertionType = (typeof CURATED_ASSERTION_TYPES)[number];
|
|
23
|
+
/**
|
|
24
|
+
* Valid rubric template names — must match keys in config/rubrics.yaml.
|
|
25
|
+
*/
|
|
26
|
+
export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage"];
|
|
27
|
+
export type RubricTemplateName = (typeof RUBRIC_TEMPLATE_NAMES)[number];
|
|
28
|
+
/**
|
|
29
|
+
* Zod schema for a single repo-based task definition.
|
|
30
|
+
*
|
|
31
|
+
* This is the external-author-facing contract. Field names are camelCase
|
|
32
|
+
* to match the Content Lake document schema (ailf.task).
|
|
33
|
+
*/
|
|
34
|
+
export declare const RepoTaskSchema: z.ZodObject<{
|
|
35
|
+
id: z.ZodString;
|
|
36
|
+
description: z.ZodString;
|
|
37
|
+
status: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
|
|
38
|
+
active: "active";
|
|
39
|
+
draft: "draft";
|
|
40
|
+
paused: "paused";
|
|
41
|
+
archived: "archived";
|
|
42
|
+
}>>>;
|
|
43
|
+
featureArea: z.ZodString;
|
|
44
|
+
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
45
|
+
canonicalDocs: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
46
|
+
id: z.ZodString;
|
|
47
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
48
|
+
slug: z.ZodOptional<z.ZodString>;
|
|
49
|
+
path: z.ZodOptional<z.ZodString>;
|
|
50
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
51
|
+
slug: z.ZodString;
|
|
52
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
53
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
54
|
+
path: z.ZodString;
|
|
55
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
56
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
57
|
+
perspective: z.ZodString;
|
|
58
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
59
|
+
}, z.core.$strip>]>>>>;
|
|
60
|
+
vars: z.ZodOptional<z.ZodObject<{
|
|
61
|
+
task: z.ZodString;
|
|
62
|
+
}, z.core.$loose>>;
|
|
63
|
+
assert: z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
64
|
+
type: z.ZodLiteral<"llm-rubric">;
|
|
65
|
+
template: z.ZodEnum<{
|
|
66
|
+
"task-completion": "task-completion";
|
|
67
|
+
"code-correctness": "code-correctness";
|
|
68
|
+
"doc-coverage": "doc-coverage";
|
|
69
|
+
}>;
|
|
70
|
+
criteria: z.ZodArray<z.ZodString>;
|
|
71
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
72
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
73
|
+
type: z.ZodEnum<{
|
|
74
|
+
"llm-rubric": "llm-rubric";
|
|
75
|
+
contains: "contains";
|
|
76
|
+
"contains-any": "contains-any";
|
|
77
|
+
"contains-all": "contains-all";
|
|
78
|
+
"not-contains": "not-contains";
|
|
79
|
+
icontains: "icontains";
|
|
80
|
+
"icontains-any": "icontains-any";
|
|
81
|
+
regex: "regex";
|
|
82
|
+
javascript: "javascript";
|
|
83
|
+
similar: "similar";
|
|
84
|
+
cost: "cost";
|
|
85
|
+
latency: "latency";
|
|
86
|
+
}>;
|
|
87
|
+
value: z.ZodOptional<z.ZodUnknown>;
|
|
88
|
+
threshold: z.ZodOptional<z.ZodNumber>;
|
|
89
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
90
|
+
}, z.core.$loose>]>>;
|
|
91
|
+
baseline: z.ZodOptional<z.ZodObject<{
|
|
92
|
+
enabled: z.ZodOptional<z.ZodBoolean>;
|
|
93
|
+
rubric: z.ZodOptional<z.ZodEnum<{
|
|
94
|
+
abbreviated: "abbreviated";
|
|
95
|
+
full: "full";
|
|
96
|
+
none: "none";
|
|
97
|
+
}>>;
|
|
98
|
+
}, z.core.$strip>>;
|
|
99
|
+
docCoverage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
100
|
+
referenceSolution: z.ZodOptional<z.ZodString>;
|
|
101
|
+
execution: z.ZodOptional<z.ZodObject<{
|
|
102
|
+
enabled: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
103
|
+
blocking: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
104
|
+
threshold: z.ZodOptional<z.ZodObject<{
|
|
105
|
+
score: z.ZodOptional<z.ZodNumber>;
|
|
106
|
+
}, z.core.$strip>>;
|
|
107
|
+
trigger: z.ZodOptional<z.ZodObject<{
|
|
108
|
+
branches: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
109
|
+
paths: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
110
|
+
}, z.core.$strip>>;
|
|
111
|
+
source: z.ZodOptional<z.ZodString>;
|
|
112
|
+
}, z.core.$strip>>;
|
|
113
|
+
}, z.core.$strip>;
|
|
114
|
+
export type RepoTask = z.infer<typeof RepoTaskSchema>;
|
|
115
|
+
/**
|
|
116
|
+
* Schema for an array of repo tasks — what a single .ailf/tasks/*.yaml file
|
|
117
|
+
* contains. Each file must define at least one task.
|
|
118
|
+
*/
|
|
119
|
+
export declare const RepoTaskFileSchema: z.ZodArray<z.ZodObject<{
|
|
120
|
+
id: z.ZodString;
|
|
121
|
+
description: z.ZodString;
|
|
122
|
+
status: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
|
|
123
|
+
active: "active";
|
|
124
|
+
draft: "draft";
|
|
125
|
+
paused: "paused";
|
|
126
|
+
archived: "archived";
|
|
127
|
+
}>>>;
|
|
128
|
+
featureArea: z.ZodString;
|
|
129
|
+
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
130
|
+
canonicalDocs: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
131
|
+
id: z.ZodString;
|
|
132
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
133
|
+
slug: z.ZodOptional<z.ZodString>;
|
|
134
|
+
path: z.ZodOptional<z.ZodString>;
|
|
135
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
136
|
+
slug: z.ZodString;
|
|
137
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
138
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
139
|
+
path: z.ZodString;
|
|
140
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
141
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
142
|
+
perspective: z.ZodString;
|
|
143
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
144
|
+
}, z.core.$strip>]>>>>;
|
|
145
|
+
vars: z.ZodOptional<z.ZodObject<{
|
|
146
|
+
task: z.ZodString;
|
|
147
|
+
}, z.core.$loose>>;
|
|
148
|
+
assert: z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
149
|
+
type: z.ZodLiteral<"llm-rubric">;
|
|
150
|
+
template: z.ZodEnum<{
|
|
151
|
+
"task-completion": "task-completion";
|
|
152
|
+
"code-correctness": "code-correctness";
|
|
153
|
+
"doc-coverage": "doc-coverage";
|
|
154
|
+
}>;
|
|
155
|
+
criteria: z.ZodArray<z.ZodString>;
|
|
156
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
157
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
158
|
+
type: z.ZodEnum<{
|
|
159
|
+
"llm-rubric": "llm-rubric";
|
|
160
|
+
contains: "contains";
|
|
161
|
+
"contains-any": "contains-any";
|
|
162
|
+
"contains-all": "contains-all";
|
|
163
|
+
"not-contains": "not-contains";
|
|
164
|
+
icontains: "icontains";
|
|
165
|
+
"icontains-any": "icontains-any";
|
|
166
|
+
regex: "regex";
|
|
167
|
+
javascript: "javascript";
|
|
168
|
+
similar: "similar";
|
|
169
|
+
cost: "cost";
|
|
170
|
+
latency: "latency";
|
|
171
|
+
}>;
|
|
172
|
+
value: z.ZodOptional<z.ZodUnknown>;
|
|
173
|
+
threshold: z.ZodOptional<z.ZodNumber>;
|
|
174
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
175
|
+
}, z.core.$loose>]>>;
|
|
176
|
+
baseline: z.ZodOptional<z.ZodObject<{
|
|
177
|
+
enabled: z.ZodOptional<z.ZodBoolean>;
|
|
178
|
+
rubric: z.ZodOptional<z.ZodEnum<{
|
|
179
|
+
abbreviated: "abbreviated";
|
|
180
|
+
full: "full";
|
|
181
|
+
none: "none";
|
|
182
|
+
}>>;
|
|
183
|
+
}, z.core.$strip>>;
|
|
184
|
+
docCoverage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
185
|
+
referenceSolution: z.ZodOptional<z.ZodString>;
|
|
186
|
+
execution: z.ZodOptional<z.ZodObject<{
|
|
187
|
+
enabled: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
188
|
+
blocking: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
189
|
+
threshold: z.ZodOptional<z.ZodObject<{
|
|
190
|
+
score: z.ZodOptional<z.ZodNumber>;
|
|
191
|
+
}, z.core.$strip>>;
|
|
192
|
+
trigger: z.ZodOptional<z.ZodObject<{
|
|
193
|
+
branches: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
194
|
+
paths: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
195
|
+
}, z.core.$strip>>;
|
|
196
|
+
source: z.ZodOptional<z.ZodString>;
|
|
197
|
+
}, z.core.$strip>>;
|
|
198
|
+
}, z.core.$strip>>;
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* schemas.ts — Zod schemas for repo-based task definitions.
|
|
3
|
+
*
|
|
4
|
+
* Validates .ailf/tasks/*.yaml task files from external repositories.
|
|
5
|
+
* These schemas are the contract between external repos and the AILF eval
|
|
6
|
+
* pipeline — they define exactly what fields are accepted, with friendly
|
|
7
|
+
* error messages for authors writing task YAML by hand.
|
|
8
|
+
*
|
|
9
|
+
* This module is the single source of truth for task schemas. The eval
|
|
10
|
+
* package re-exports from here to avoid duplication.
|
|
11
|
+
*
|
|
12
|
+
* @see docs/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
|
|
13
|
+
*/
|
|
14
|
+
import { z } from "zod";
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
// Constants — curated assertion types and rubric template names
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
/**
|
|
19
|
+
* The set of assertion types allowed in repo-based task files.
|
|
20
|
+
*
|
|
21
|
+
* This is a curated subset of Promptfoo assertion types — we expose only the
|
|
22
|
+
* types that are stable, well-documented, and useful for external authors.
|
|
23
|
+
*/
|
|
24
|
+
export const CURATED_ASSERTION_TYPES = [
|
|
25
|
+
"llm-rubric",
|
|
26
|
+
"contains",
|
|
27
|
+
"contains-any",
|
|
28
|
+
"contains-all",
|
|
29
|
+
"not-contains",
|
|
30
|
+
"icontains",
|
|
31
|
+
"icontains-any",
|
|
32
|
+
"regex",
|
|
33
|
+
"javascript",
|
|
34
|
+
"similar",
|
|
35
|
+
"cost",
|
|
36
|
+
"latency",
|
|
37
|
+
];
|
|
38
|
+
/**
|
|
39
|
+
* Valid rubric template names — must match keys in config/rubrics.yaml.
|
|
40
|
+
*/
|
|
41
|
+
export const RUBRIC_TEMPLATE_NAMES = [
|
|
42
|
+
"task-completion",
|
|
43
|
+
"code-correctness",
|
|
44
|
+
"doc-coverage",
|
|
45
|
+
];
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
// Assertion schemas
|
|
48
|
+
// ---------------------------------------------------------------------------
|
|
49
|
+
/**
|
|
50
|
+
* Polymorphic canonical doc reference — discriminated by key presence.
|
|
51
|
+
* Exactly one resolution key (slug, path, id, or perspective) must be present.
|
|
52
|
+
*
|
|
53
|
+
* @see docs/design-docs/canonical-doc-resolution.md
|
|
54
|
+
*/
|
|
55
|
+
const SlugDocRefSchema = z.object({
|
|
56
|
+
slug: z.string().min(1),
|
|
57
|
+
reason: z.string().optional().default(""),
|
|
58
|
+
});
|
|
59
|
+
const PathDocRefSchema = z.object({
|
|
60
|
+
path: z.string().min(1),
|
|
61
|
+
reason: z.string().optional().default(""),
|
|
62
|
+
});
|
|
63
|
+
const IdDocRefSchema = z.object({
|
|
64
|
+
id: z.string().min(1),
|
|
65
|
+
reason: z.string().optional().default(""),
|
|
66
|
+
/** Human-readable slug annotation (not used for resolution) */
|
|
67
|
+
slug: z.string().optional(),
|
|
68
|
+
/** Human-readable path annotation (not used for resolution) */
|
|
69
|
+
path: z.string().optional(),
|
|
70
|
+
});
|
|
71
|
+
const PerspectiveDocRefSchema = z.object({
|
|
72
|
+
perspective: z.string().min(1),
|
|
73
|
+
reason: z.string().optional().default(""),
|
|
74
|
+
});
|
|
75
|
+
// Order matters: IdDocRefSchema first because it may also carry `slug`
|
|
76
|
+
// and `path` as optional annotations. Zod tries schemas in order, so
|
|
77
|
+
// entries like `{ id: "...", slug: "..." }` must match IdDocRefSchema
|
|
78
|
+
// (not SlugDocRefSchema).
|
|
79
|
+
const CanonicalDocRefSchema = z.union([
|
|
80
|
+
IdDocRefSchema,
|
|
81
|
+
SlugDocRefSchema,
|
|
82
|
+
PathDocRefSchema,
|
|
83
|
+
PerspectiveDocRefSchema,
|
|
84
|
+
]);
|
|
85
|
+
/**
|
|
86
|
+
* A templated LLM-rubric assertion — uses one of the predefined rubric
|
|
87
|
+
* templates with author-supplied criteria.
|
|
88
|
+
*/
|
|
89
|
+
const TemplatedAssertionSchema = z.object({
|
|
90
|
+
type: z.literal("llm-rubric"),
|
|
91
|
+
template: z.enum(RUBRIC_TEMPLATE_NAMES),
|
|
92
|
+
criteria: z.array(z.string().min(1)).min(1),
|
|
93
|
+
weight: z.number().optional(),
|
|
94
|
+
});
|
|
95
|
+
/**
|
|
96
|
+
* A value-based assertion (contains, regex, cost, etc.). Uses .passthrough()
|
|
97
|
+
* to allow extra fields for future extension without schema breakage.
|
|
98
|
+
*/
|
|
99
|
+
const ValueAssertionSchema = z
|
|
100
|
+
.object({
|
|
101
|
+
type: z.enum(CURATED_ASSERTION_TYPES),
|
|
102
|
+
value: z.unknown().optional(),
|
|
103
|
+
threshold: z.number().optional(),
|
|
104
|
+
weight: z.number().optional(),
|
|
105
|
+
})
|
|
106
|
+
.passthrough();
|
|
107
|
+
/** Union of all supported assertion shapes. */
|
|
108
|
+
const AssertionSchema = z.union([
|
|
109
|
+
TemplatedAssertionSchema,
|
|
110
|
+
ValueAssertionSchema,
|
|
111
|
+
]);
|
|
112
|
+
// ---------------------------------------------------------------------------
|
|
113
|
+
// Nested config schemas
|
|
114
|
+
// ---------------------------------------------------------------------------
|
|
115
|
+
const BaselineConfigSchema = z
|
|
116
|
+
.object({
|
|
117
|
+
enabled: z.boolean().optional(),
|
|
118
|
+
rubric: z.enum(["abbreviated", "full", "none"]).optional(),
|
|
119
|
+
})
|
|
120
|
+
.optional();
|
|
121
|
+
const ExecutionConfigSchema = z
|
|
122
|
+
.object({
|
|
123
|
+
enabled: z.boolean().optional().default(true),
|
|
124
|
+
blocking: z.boolean().optional().default(false),
|
|
125
|
+
threshold: z
|
|
126
|
+
.object({
|
|
127
|
+
score: z.number().min(0).max(100).optional(),
|
|
128
|
+
})
|
|
129
|
+
.optional(),
|
|
130
|
+
trigger: z
|
|
131
|
+
.object({
|
|
132
|
+
branches: z.array(z.string()).optional(),
|
|
133
|
+
paths: z.array(z.string()).optional(),
|
|
134
|
+
})
|
|
135
|
+
.optional(),
|
|
136
|
+
source: z.string().optional(),
|
|
137
|
+
})
|
|
138
|
+
.optional();
|
|
139
|
+
// ---------------------------------------------------------------------------
|
|
140
|
+
// RepoTaskSchema — a single task definition from .ailf/tasks/*.yaml
|
|
141
|
+
// ---------------------------------------------------------------------------
|
|
142
|
+
/**
|
|
143
|
+
* Zod schema for a single repo-based task definition.
|
|
144
|
+
*
|
|
145
|
+
* This is the external-author-facing contract. Field names are camelCase
|
|
146
|
+
* to match the Content Lake document schema (ailf.task).
|
|
147
|
+
*/
|
|
148
|
+
export const RepoTaskSchema = z.object({
|
|
149
|
+
id: z
|
|
150
|
+
.string()
|
|
151
|
+
.min(1)
|
|
152
|
+
.regex(/^[a-z0-9][a-z0-9-]*$/, "Task ID must be lowercase alphanumeric with hyphens"),
|
|
153
|
+
description: z.string().min(1),
|
|
154
|
+
status: z
|
|
155
|
+
.enum(["active", "draft", "paused", "archived"])
|
|
156
|
+
.optional()
|
|
157
|
+
.default("active"),
|
|
158
|
+
featureArea: z
|
|
159
|
+
.string()
|
|
160
|
+
.min(1)
|
|
161
|
+
.regex(/^[a-z0-9][a-z0-9-]*$/, "Feature area must be lowercase alphanumeric with hyphens"),
|
|
162
|
+
tags: z.array(z.string()).optional(),
|
|
163
|
+
canonicalDocs: z.array(CanonicalDocRefSchema).optional().default([]),
|
|
164
|
+
vars: z
|
|
165
|
+
.object({
|
|
166
|
+
task: z.string().min(1),
|
|
167
|
+
})
|
|
168
|
+
.passthrough()
|
|
169
|
+
.optional(),
|
|
170
|
+
assert: z.array(AssertionSchema).min(1),
|
|
171
|
+
baseline: BaselineConfigSchema,
|
|
172
|
+
docCoverage: z.boolean().optional().default(false),
|
|
173
|
+
referenceSolution: z.string().optional(),
|
|
174
|
+
execution: ExecutionConfigSchema,
|
|
175
|
+
});
|
|
176
|
+
/**
|
|
177
|
+
* Schema for an array of repo tasks — what a single .ailf/tasks/*.yaml file
|
|
178
|
+
* contains. Each file must define at least one task.
|
|
179
|
+
*/
|
|
180
|
+
export const RepoTaskFileSchema = z.array(RepoTaskSchema).min(1);
|