@sanity/ailf 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +29 -12
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
- package/dist/_vendor/ailf-core/config-helpers.js +51 -2
- package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
- package/dist/_vendor/ailf-core/examples/index.js +213 -94
- package/dist/_vendor/ailf-core/index.d.ts +3 -2
- package/dist/_vendor/ailf-core/index.js +2 -1
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
- package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +7 -1
- package/dist/adapters/config-sources/ts-config-loader.js +21 -13
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
- package/dist/adapters/task-sources/index.d.ts +3 -4
- package/dist/adapters/task-sources/index.js +3 -4
- package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
- package/dist/adapters/task-sources/repo-schemas.js +228 -20
- package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
- package/dist/adapters/task-sources/repo-task-source.js +81 -122
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
- package/dist/adapters/task-sources/task-file-loader.js +21 -7
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/coverage-audit.js +3 -1
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +5 -4
- package/dist/commands/init.js +190 -25
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +15 -4
- package/dist/composition-root.js +100 -55
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/build-step-sequence.js +4 -2
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +32 -19
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +77 -26
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +51 -31
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
- package/dist/pipeline/compiler/literacy-bridge.js +2 -2
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
- package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
- package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/expand-tasks.d.ts +2 -2
- package/dist/pipeline/expand-tasks.js +2 -2
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +16 -9
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +16 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
- package/dist/pipeline/mirror-repo-tasks.js +10 -10
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +68 -30
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +32 -24
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
|
@@ -1,42 +1,250 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* repo-schemas.ts —
|
|
2
|
+
* repo-schemas.ts — Canonical Zod schemas for task and config validation.
|
|
3
3
|
*
|
|
4
|
-
* Task schemas
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Task schemas validate .ailf/tasks/*.yaml and .task.ts files against the
|
|
5
|
+
* canonical GeneralizedTaskDefinition shape. Field names match the internal
|
|
6
|
+
* domain model: `area` (not featureArea), `assertions` (not assert),
|
|
7
|
+
* `context.docs` (not canonicalDocs), `prompt.text` (not vars.task).
|
|
7
8
|
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
* external tools that only validate task YAML.
|
|
9
|
+
* Previously this file re-exported from @sanity/ailf-tasks. That package
|
|
10
|
+
* has been eliminated — all schema logic now lives here.
|
|
11
11
|
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
12
|
+
* Config schemas (RepoConfigSchema, trigger config) are eval-pipeline-
|
|
13
|
+
* specific and remain here unchanged.
|
|
14
|
+
*
|
|
15
|
+
* @see packages/core/src/types/generalized-task.ts — canonical TypeScript types
|
|
16
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
|
|
14
17
|
*/
|
|
15
|
-
import { RepoTaskFileSchema as _Schema } from "../../_vendor/ailf-tasks/index.js";
|
|
16
18
|
import { z } from "zod";
|
|
17
19
|
// ---------------------------------------------------------------------------
|
|
18
|
-
//
|
|
20
|
+
// Constants — curated assertion types and rubric template names
|
|
19
21
|
// ---------------------------------------------------------------------------
|
|
20
|
-
export { CURATED_ASSERTION_TYPES, RepoTaskFileSchema, RepoTaskSchema, RUBRIC_TEMPLATE_NAMES, } from "../../_vendor/ailf-tasks/index.js";
|
|
21
|
-
export { loadTaskDir, parseTaskFile } from "../../_vendor/ailf-tasks/index.js";
|
|
22
22
|
/**
|
|
23
|
-
*
|
|
24
|
-
* with a user-friendly Zod error message.
|
|
23
|
+
* The set of assertion types allowed in task files.
|
|
25
24
|
*
|
|
26
|
-
*
|
|
27
|
-
*
|
|
25
|
+
* This is a curated subset of Promptfoo assertion types — we expose only the
|
|
26
|
+
* types that are stable, well-documented, and useful for external authors.
|
|
27
|
+
*/
|
|
28
|
+
export const CURATED_ASSERTION_TYPES = [
|
|
29
|
+
"llm-rubric",
|
|
30
|
+
"contains",
|
|
31
|
+
"contains-any",
|
|
32
|
+
"contains-all",
|
|
33
|
+
"not-contains",
|
|
34
|
+
"icontains",
|
|
35
|
+
"icontains-any",
|
|
36
|
+
"regex",
|
|
37
|
+
"javascript",
|
|
38
|
+
"similar",
|
|
39
|
+
"cost",
|
|
40
|
+
"latency",
|
|
41
|
+
];
|
|
42
|
+
/**
|
|
43
|
+
* Valid rubric template names — must match keys in config/rubrics.yaml.
|
|
44
|
+
*/
|
|
45
|
+
export const RUBRIC_TEMPLATE_NAMES = [
|
|
46
|
+
"task-completion",
|
|
47
|
+
"code-correctness",
|
|
48
|
+
"doc-coverage",
|
|
49
|
+
];
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
// Doc ref schemas — polymorphic canonical doc references
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
/**
|
|
54
|
+
* Polymorphic canonical doc reference — discriminated by key presence.
|
|
55
|
+
* Exactly one resolution key (slug, path, id, or perspective) must be present.
|
|
56
|
+
*
|
|
57
|
+
* @see docs/design-docs/canonical-doc-resolution.md
|
|
58
|
+
*/
|
|
59
|
+
const SlugDocRefSchema = z.object({
|
|
60
|
+
slug: z.string().min(1),
|
|
61
|
+
reason: z.string().optional().default(""),
|
|
62
|
+
});
|
|
63
|
+
const PathDocRefSchema = z.object({
|
|
64
|
+
path: z.string().min(1),
|
|
65
|
+
reason: z.string().optional().default(""),
|
|
66
|
+
});
|
|
67
|
+
const IdDocRefSchema = z.object({
|
|
68
|
+
id: z.string().min(1),
|
|
69
|
+
reason: z.string().optional().default(""),
|
|
70
|
+
/** Human-readable slug annotation (not used for resolution) */
|
|
71
|
+
slug: z.string().optional(),
|
|
72
|
+
/** Human-readable path annotation (not used for resolution) */
|
|
73
|
+
path: z.string().optional(),
|
|
74
|
+
});
|
|
75
|
+
const PerspectiveDocRefSchema = z.object({
|
|
76
|
+
perspective: z.string().min(1),
|
|
77
|
+
reason: z.string().optional().default(""),
|
|
78
|
+
});
|
|
79
|
+
// Order matters: IdDocRefSchema first because it may also carry `slug`
|
|
80
|
+
// and `path` as optional annotations. Zod tries schemas in order, so
|
|
81
|
+
// entries like `{ id: "...", slug: "..." }` must match IdDocRefSchema
|
|
82
|
+
// (not SlugDocRefSchema).
|
|
83
|
+
const CanonicalDocRefSchema = z.union([
|
|
84
|
+
IdDocRefSchema,
|
|
85
|
+
SlugDocRefSchema,
|
|
86
|
+
PathDocRefSchema,
|
|
87
|
+
PerspectiveDocRefSchema,
|
|
88
|
+
]);
|
|
89
|
+
// ---------------------------------------------------------------------------
|
|
90
|
+
// Assertion schemas
|
|
91
|
+
// ---------------------------------------------------------------------------
|
|
92
|
+
/**
|
|
93
|
+
* A templated LLM-rubric assertion — uses one of the predefined rubric
|
|
94
|
+
* templates with author-supplied criteria.
|
|
28
95
|
*/
|
|
29
|
-
|
|
30
|
-
|
|
96
|
+
const TemplatedAssertionSchema = z.object({
|
|
97
|
+
type: z.literal("llm-rubric"),
|
|
98
|
+
template: z.enum(RUBRIC_TEMPLATE_NAMES),
|
|
99
|
+
criteria: z.array(z.string().min(1)).min(1),
|
|
100
|
+
weight: z.number().optional(),
|
|
101
|
+
});
|
|
102
|
+
/**
|
|
103
|
+
* A value-based assertion (contains, regex, cost, etc.). Uses .passthrough()
|
|
104
|
+
* to allow extra fields for future extension without schema breakage.
|
|
105
|
+
*/
|
|
106
|
+
const ValueAssertionSchema = z
|
|
107
|
+
.object({
|
|
108
|
+
type: z.enum(CURATED_ASSERTION_TYPES),
|
|
109
|
+
value: z.unknown().optional(),
|
|
110
|
+
threshold: z.number().optional(),
|
|
111
|
+
weight: z.number().optional(),
|
|
112
|
+
})
|
|
113
|
+
.passthrough();
|
|
114
|
+
/** Union of all supported assertion shapes. */
|
|
115
|
+
const AssertionSchema = z.union([
|
|
116
|
+
TemplatedAssertionSchema,
|
|
117
|
+
ValueAssertionSchema,
|
|
118
|
+
]);
|
|
119
|
+
// ---------------------------------------------------------------------------
|
|
120
|
+
// Nested config schemas
|
|
121
|
+
// ---------------------------------------------------------------------------
|
|
122
|
+
const BaselineConfigSchema = z
|
|
123
|
+
.object({
|
|
124
|
+
enabled: z.boolean().optional(),
|
|
125
|
+
rubric: z.enum(["abbreviated", "full", "none"]).optional(),
|
|
126
|
+
})
|
|
127
|
+
.optional();
|
|
128
|
+
// ---------------------------------------------------------------------------
|
|
129
|
+
// CanonicalTaskSchema — the single canonical task shape
|
|
130
|
+
//
|
|
131
|
+
// Validates the GeneralizedTaskDefinition shape. Field names match the
|
|
132
|
+
// internal domain model directly — no mapping layer needed.
|
|
133
|
+
//
|
|
134
|
+
// YAML tasks may omit `mode` (defaults to "literacy"). All other fields
|
|
135
|
+
// use the canonical names: `title`, `area`, `prompt.text`, `context.docs`,
|
|
136
|
+
// `assertions`.
|
|
137
|
+
// ---------------------------------------------------------------------------
|
|
138
|
+
/**
|
|
139
|
+
* Zod schema for a single task definition using canonical field names.
|
|
140
|
+
*
|
|
141
|
+
* Uses .passthrough() to allow mode-specific fields (serverConfig, sandbox,
|
|
142
|
+
* handler, etc.) without listing every possible field. Mode-specific
|
|
143
|
+
* validation is deferred to the pipeline's mode handlers.
|
|
144
|
+
*/
|
|
145
|
+
export const CanonicalTaskSchema = z
|
|
146
|
+
.object({
|
|
147
|
+
id: z
|
|
148
|
+
.string()
|
|
149
|
+
.min(1)
|
|
150
|
+
.regex(/^[a-z0-9][a-z0-9-]*$/, "Task ID must be lowercase alphanumeric with hyphens"),
|
|
151
|
+
mode: z.string().default("literacy"),
|
|
152
|
+
title: z.string().min(1),
|
|
153
|
+
description: z.string().optional(),
|
|
154
|
+
area: z.string().optional(),
|
|
155
|
+
difficulty: z.enum(["basic", "intermediate", "advanced"]).optional(),
|
|
156
|
+
status: z
|
|
157
|
+
.enum(["active", "draft", "paused", "archived"])
|
|
158
|
+
.optional()
|
|
159
|
+
.default("active"),
|
|
160
|
+
tags: z.array(z.string()).optional(),
|
|
161
|
+
prompt: z
|
|
162
|
+
.object({
|
|
163
|
+
template: z.string().optional(),
|
|
164
|
+
text: z.string().optional(),
|
|
165
|
+
systemMessage: z.string().optional(),
|
|
166
|
+
vars: z.record(z.string(), z.unknown()).optional(),
|
|
167
|
+
})
|
|
168
|
+
.optional(),
|
|
169
|
+
context: z
|
|
170
|
+
.object({
|
|
171
|
+
docs: z.array(CanonicalDocRefSchema).optional(),
|
|
172
|
+
fixtures: z.array(z.string()).optional(),
|
|
173
|
+
})
|
|
174
|
+
.optional(),
|
|
175
|
+
assertions: z.array(AssertionSchema).optional(),
|
|
176
|
+
referenceSolution: z.string().optional(),
|
|
177
|
+
docCoverage: z.boolean().optional().default(false),
|
|
178
|
+
baseline: BaselineConfigSchema,
|
|
179
|
+
rubric: z.unknown().optional(),
|
|
180
|
+
providers: z.array(z.unknown()).optional(),
|
|
181
|
+
options: z.unknown().optional(),
|
|
182
|
+
metadata: z.record(z.string(), z.unknown()).optional(),
|
|
183
|
+
})
|
|
184
|
+
.passthrough();
|
|
185
|
+
/**
|
|
186
|
+
* Schema for an array of canonical tasks — what a single .ailf/tasks/*.yaml
|
|
187
|
+
* file contains. Each file must define at least one task.
|
|
188
|
+
*/
|
|
189
|
+
export const CanonicalTaskFileSchema = z.array(CanonicalTaskSchema).min(1);
|
|
190
|
+
/**
|
|
191
|
+
* Parse and validate a task file's content against the canonical schema.
|
|
192
|
+
* Returns typed tasks or throws with a user-friendly Zod error message.
|
|
193
|
+
*
|
|
194
|
+
* Accepts pre-parsed YAML data (unknown), not a raw string.
|
|
195
|
+
*/
|
|
196
|
+
export function parseCanonicalTaskFile(raw, filename) {
|
|
197
|
+
const result = CanonicalTaskFileSchema.safeParse(raw);
|
|
31
198
|
if (!result.success) {
|
|
32
199
|
const messages = result.error.issues
|
|
33
200
|
.map((i) => ` [${i.path.join(".")}]: ${i.message}`)
|
|
34
201
|
.join("\n");
|
|
35
|
-
throw new Error(`Invalid
|
|
202
|
+
throw new Error(`Invalid task file "${filename}":\n${messages}`);
|
|
36
203
|
}
|
|
37
204
|
return result.data;
|
|
38
205
|
}
|
|
39
206
|
// ---------------------------------------------------------------------------
|
|
207
|
+
// Legacy field name detection
|
|
208
|
+
//
|
|
209
|
+
// When authors accidentally use the old @sanity/ailf-tasks field names
|
|
210
|
+
// (featureArea, canonicalDocs, assert, vars), surface a helpful error
|
|
211
|
+
// message telling them what the canonical names are.
|
|
212
|
+
// ---------------------------------------------------------------------------
|
|
213
|
+
/** Old field names from @sanity/ailf-tasks → canonical equivalents */
|
|
214
|
+
const LEGACY_FIELD_MAP = {
|
|
215
|
+
featureArea: "area",
|
|
216
|
+
canonicalDocs: "context.docs (nested under context: { docs: [...] })",
|
|
217
|
+
assert: "assertions",
|
|
218
|
+
vars: "prompt (nested under prompt: { text: ... })",
|
|
219
|
+
};
|
|
220
|
+
/**
|
|
221
|
+
* Detect legacy field names in raw task data and return helpful messages.
|
|
222
|
+
*
|
|
223
|
+
* Runs BEFORE Zod parsing to catch the most common migration mistake —
|
|
224
|
+
* using old field names from @sanity/ailf-tasks instead of the canonical
|
|
225
|
+
* GeneralizedTaskDefinition shape.
|
|
226
|
+
*/
|
|
227
|
+
export function detectLegacyFieldNames(raw, filename) {
|
|
228
|
+
const warnings = [];
|
|
229
|
+
if (!Array.isArray(raw))
|
|
230
|
+
return warnings;
|
|
231
|
+
for (let i = 0; i < raw.length; i++) {
|
|
232
|
+
const entry = raw[i];
|
|
233
|
+
if (typeof entry !== "object" || entry === null)
|
|
234
|
+
continue;
|
|
235
|
+
const obj = entry;
|
|
236
|
+
const taskId = typeof obj.id === "string" ? obj.id : `task[${i}]`;
|
|
237
|
+
for (const [legacy, canonical] of Object.entries(LEGACY_FIELD_MAP)) {
|
|
238
|
+
if (legacy in obj) {
|
|
239
|
+
warnings.push(`[${filename}] ${taskId}: Found legacy field "${legacy}" — ` +
|
|
240
|
+
`use "${canonical}" instead. ` +
|
|
241
|
+
"See contributing-tasks.md for the canonical task format.");
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
return warnings;
|
|
246
|
+
}
|
|
247
|
+
// ---------------------------------------------------------------------------
|
|
40
248
|
// Config schemas — specific to the eval pipeline
|
|
41
249
|
// ---------------------------------------------------------------------------
|
|
42
250
|
const TriggerModeSchema = z.enum(["validate-only", "eval"]);
|
|
@@ -1,18 +1,22 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Adapter: Load task definitions from .ailf/tasks
|
|
2
|
+
* Adapter: Load task definitions from .ailf/tasks/ in an external repo.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* document schema) with slug strings instead of Sanity references. The
|
|
6
|
-
* mapping to LiteracyTaskDefinition is straightforward — field names are
|
|
7
|
-
* already aligned with the domain type.
|
|
4
|
+
* Supports two task file formats:
|
|
8
5
|
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
* - Has an explicit featureArea field per task (not derived from filename)
|
|
6
|
+
* 1. **TypeScript (.task.ts)** — Tasks authored with `defineTask()` from
|
|
7
|
+
* `@sanity/ailf`. These use `GeneralizedTaskDefinition` field names
|
|
8
|
+
* and are passed through with basic runtime guards.
|
|
13
9
|
*
|
|
10
|
+
* 2. **YAML (.yaml)** — Tasks using the canonical `GeneralizedTaskDefinition`
|
|
11
|
+
* field names (area, context.docs, assertions, prompt.text). Validated
|
|
12
|
+
* through the CanonicalTaskSchema Zod schema.
|
|
13
|
+
*
|
|
14
|
+
* All tasks use the single canonical shape — no mapping layer, no dual-shape
|
|
15
|
+
* detection. The `mode` field defaults to "literacy" for YAML tasks that
|
|
16
|
+
* omit it.
|
|
17
|
+
*
|
|
18
|
+
* @see packages/core/src/types/generalized-task.ts — canonical types
|
|
14
19
|
* @see packages/core/src/ports/task-source.ts — TaskSource port
|
|
15
|
-
* @see docs/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
|
|
16
20
|
*/
|
|
17
21
|
import type { FilterOptions, GeneralizedTaskDefinition, TaskSource } from "../../_vendor/ailf-core/index.d.ts";
|
|
18
22
|
export declare class RepoTaskSource implements TaskSource {
|
|
@@ -1,24 +1,31 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Adapter: Load task definitions from .ailf/tasks
|
|
2
|
+
* Adapter: Load task definitions from .ailf/tasks/ in an external repo.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
* document schema) with slug strings instead of Sanity references. The
|
|
6
|
-
* mapping to LiteracyTaskDefinition is straightforward — field names are
|
|
7
|
-
* already aligned with the domain type.
|
|
4
|
+
* Supports two task file formats:
|
|
8
5
|
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
* - Has an explicit featureArea field per task (not derived from filename)
|
|
6
|
+
* 1. **TypeScript (.task.ts)** — Tasks authored with `defineTask()` from
|
|
7
|
+
* `@sanity/ailf`. These use `GeneralizedTaskDefinition` field names
|
|
8
|
+
* and are passed through with basic runtime guards.
|
|
13
9
|
*
|
|
10
|
+
* 2. **YAML (.yaml)** — Tasks using the canonical `GeneralizedTaskDefinition`
|
|
11
|
+
* field names (area, context.docs, assertions, prompt.text). Validated
|
|
12
|
+
* through the CanonicalTaskSchema Zod schema.
|
|
13
|
+
*
|
|
14
|
+
* All tasks use the single canonical shape — no mapping layer, no dual-shape
|
|
15
|
+
* detection. The `mode` field defaults to "literacy" for YAML tasks that
|
|
16
|
+
* omit it.
|
|
17
|
+
*
|
|
18
|
+
* @see packages/core/src/types/generalized-task.ts — canonical types
|
|
14
19
|
* @see packages/core/src/ports/task-source.ts — TaskSource port
|
|
15
|
-
* @see docs/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
|
|
16
20
|
*/
|
|
17
21
|
import { existsSync, readdirSync, readFileSync } from "fs";
|
|
18
22
|
import { resolve } from "path";
|
|
19
23
|
import { load } from "js-yaml";
|
|
20
|
-
import {
|
|
24
|
+
import { CANONICAL_EVAL_MODES } from "../../_vendor/ailf-shared/index.js";
|
|
25
|
+
import { detectLegacyFieldNames, parseCanonicalTaskFile, } from "./repo-schemas.js";
|
|
21
26
|
import { discoverTsTaskFiles, loadTsTaskFile } from "./task-file-loader.js";
|
|
27
|
+
/** Set of canonical mode names for O(1) lookup */
|
|
28
|
+
const KNOWN_MODES = new Set(CANONICAL_EVAL_MODES);
|
|
22
29
|
// ---------------------------------------------------------------------------
|
|
23
30
|
// RepoTaskSource adapter
|
|
24
31
|
// ---------------------------------------------------------------------------
|
|
@@ -41,6 +48,7 @@ export class RepoTaskSource {
|
|
|
41
48
|
" Expected .ailf/tasks/*.yaml or .ailf/tasks/*.task.ts files");
|
|
42
49
|
}
|
|
43
50
|
const definitions = [];
|
|
51
|
+
// Load YAML task files
|
|
44
52
|
for (const file of yamlFiles) {
|
|
45
53
|
const filePath = resolve(this.tasksDir, file);
|
|
46
54
|
const raw = readFileSync(filePath, "utf-8");
|
|
@@ -49,139 +57,90 @@ export class RepoTaskSource {
|
|
|
49
57
|
throw new Error(`${file} did not parse to an array of tasks. ` +
|
|
50
58
|
"Repo task files must contain a YAML array of task definitions.");
|
|
51
59
|
}
|
|
52
|
-
//
|
|
60
|
+
// Detect legacy field names (featureArea, canonicalDocs, assert, vars)
|
|
61
|
+
// and surface helpful migration messages before Zod validation fails.
|
|
62
|
+
const legacyWarnings = detectLegacyFieldNames(parsed, file);
|
|
63
|
+
if (legacyWarnings.length > 0) {
|
|
64
|
+
throw new Error(`${file} uses legacy field names from @sanity/ailf-tasks.\n` +
|
|
65
|
+
"Task files must use canonical GeneralizedTaskDefinition field names.\n\n" +
|
|
66
|
+
legacyWarnings.join("\n") +
|
|
67
|
+
"\n\nSee contributing-tasks.md for the canonical task format.");
|
|
68
|
+
}
|
|
69
|
+
// Validate through canonical Zod schema
|
|
53
70
|
let validated;
|
|
54
71
|
try {
|
|
55
|
-
validated =
|
|
72
|
+
validated = parseCanonicalTaskFile(parsed, file);
|
|
56
73
|
}
|
|
57
74
|
catch (err) {
|
|
58
75
|
const msg = err instanceof Error ? err.message : String(err);
|
|
59
76
|
throw new Error(`Failed to validate ${file}:\n${msg}`, { cause: err });
|
|
60
77
|
}
|
|
61
|
-
for (const
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
// 2. Task ID filter — skip tasks not matching explicit task IDs
|
|
65
|
-
// 3. Status filter — skip non-active tasks (unless targeting by ID)
|
|
66
|
-
// 4. Tag filter — skip tasks not matching requested tags
|
|
67
|
-
// Area filter
|
|
68
|
-
if (filter?.areas &&
|
|
69
|
-
filter.areas.length > 0 &&
|
|
70
|
-
!filter.areas
|
|
71
|
-
.map((a) => a.toLowerCase())
|
|
72
|
-
.includes(entry.featureArea.toLowerCase())) {
|
|
73
|
-
continue;
|
|
74
|
-
}
|
|
75
|
-
// Task ID filter
|
|
76
|
-
if (filter?.taskIds &&
|
|
77
|
-
filter.taskIds.length > 0 &&
|
|
78
|
-
!filter.taskIds.includes(entry.id)) {
|
|
79
|
-
continue;
|
|
80
|
-
}
|
|
81
|
-
// Status filter — unified lifecycle control
|
|
82
|
-
// Resolve effective status: explicit status field wins,
|
|
83
|
-
// then fall back to execution.enabled for backwards compat
|
|
84
|
-
const effectiveStatus = entry.status ??
|
|
85
|
-
(entry.execution?.enabled === false ? "paused" : "active");
|
|
86
|
-
const isTargetedById = filter?.taskIds && filter.taskIds.includes(entry.id);
|
|
87
|
-
if (effectiveStatus === "archived") {
|
|
88
|
-
continue; // Archived is always excluded, even with --task
|
|
89
|
-
}
|
|
90
|
-
if (effectiveStatus === "paused" && !isTargetedById) {
|
|
91
|
-
continue; // Paused skipped unless explicitly targeted
|
|
78
|
+
for (const task of validated) {
|
|
79
|
+
if (passesFilter(task, filter)) {
|
|
80
|
+
definitions.push(task);
|
|
92
81
|
}
|
|
93
|
-
if (effectiveStatus === "draft" &&
|
|
94
|
-
!isTargetedById &&
|
|
95
|
-
!filter?.includeDrafts) {
|
|
96
|
-
continue; // Draft skipped unless targeted or includeDrafts
|
|
97
|
-
}
|
|
98
|
-
// Tag filter — skip tasks that don't match any requested tag
|
|
99
|
-
if (filter?.tags &&
|
|
100
|
-
filter.tags.length > 0 &&
|
|
101
|
-
(!entry.tags || !entry.tags.some((t) => filter.tags.includes(t)))) {
|
|
102
|
-
continue;
|
|
103
|
-
}
|
|
104
|
-
definitions.push(mapToLiteracyTask(entry));
|
|
105
82
|
}
|
|
106
83
|
}
|
|
107
84
|
// Load TS task files (.task.ts / .task.js)
|
|
108
85
|
for (const tsFile of tsFiles) {
|
|
109
86
|
const loaded = await loadTsTaskFile(tsFile);
|
|
110
87
|
const filename = tsFile.split("/").pop() ?? tsFile;
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
}
|
|
116
|
-
catch (err) {
|
|
117
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
118
|
-
throw new Error(`Failed to validate ${filename}:\n${msg}`, {
|
|
119
|
-
cause: err,
|
|
120
|
-
});
|
|
121
|
-
}
|
|
122
|
-
for (const entry of validated) {
|
|
123
|
-
// Apply the same filtering as YAML tasks
|
|
124
|
-
if (filter?.areas &&
|
|
125
|
-
filter.areas.length > 0 &&
|
|
126
|
-
!filter.areas
|
|
127
|
-
.map((a) => a.toLowerCase())
|
|
128
|
-
.includes(entry.featureArea.toLowerCase())) {
|
|
129
|
-
continue;
|
|
88
|
+
for (const raw of loaded.tasks) {
|
|
89
|
+
const task = raw;
|
|
90
|
+
if (!task.id || typeof task.id !== "string") {
|
|
91
|
+
throw new Error(`Task in ${filename} is missing a valid "id" field`);
|
|
130
92
|
}
|
|
131
|
-
if (
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
continue;
|
|
93
|
+
if (!task.mode || !KNOWN_MODES.has(task.mode)) {
|
|
94
|
+
throw new Error(`Task "${task.id}" in ${filename} has missing or unknown mode "${task.mode}". ` +
|
|
95
|
+
`Valid modes: ${[...KNOWN_MODES].join(", ")}`);
|
|
135
96
|
}
|
|
136
|
-
|
|
137
|
-
(
|
|
138
|
-
const isTargetedById = filter?.taskIds && filter.taskIds.includes(entry.id);
|
|
139
|
-
if (effectiveStatus === "archived")
|
|
140
|
-
continue;
|
|
141
|
-
if (effectiveStatus === "paused" && !isTargetedById)
|
|
142
|
-
continue;
|
|
143
|
-
if (effectiveStatus === "draft" &&
|
|
144
|
-
!isTargetedById &&
|
|
145
|
-
!filter?.includeDrafts) {
|
|
146
|
-
continue;
|
|
97
|
+
if (passesFilter(task, filter)) {
|
|
98
|
+
definitions.push(task);
|
|
147
99
|
}
|
|
148
|
-
if (filter?.tags &&
|
|
149
|
-
filter.tags.length > 0 &&
|
|
150
|
-
(!entry.tags || !entry.tags.some((t) => filter.tags.includes(t)))) {
|
|
151
|
-
continue;
|
|
152
|
-
}
|
|
153
|
-
definitions.push(mapToLiteracyTask(entry));
|
|
154
100
|
}
|
|
155
101
|
}
|
|
156
102
|
return definitions;
|
|
157
103
|
}
|
|
158
104
|
}
|
|
159
105
|
// ---------------------------------------------------------------------------
|
|
160
|
-
//
|
|
106
|
+
// Filter helper
|
|
161
107
|
// ---------------------------------------------------------------------------
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
//
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
108
|
+
/**
|
|
109
|
+
* Apply standard task filtering. Used for both YAML and TS tasks.
|
|
110
|
+
*/
|
|
111
|
+
function passesFilter(task, filter) {
|
|
112
|
+
// Area filter
|
|
113
|
+
if (filter?.areas &&
|
|
114
|
+
filter.areas.length > 0 &&
|
|
115
|
+
(!task.area ||
|
|
116
|
+
!filter.areas
|
|
117
|
+
.map((a) => a.toLowerCase())
|
|
118
|
+
.includes(task.area.toLowerCase()))) {
|
|
119
|
+
return false;
|
|
120
|
+
}
|
|
121
|
+
// Task ID filter
|
|
122
|
+
if (filter?.taskIds &&
|
|
123
|
+
filter.taskIds.length > 0 &&
|
|
124
|
+
!filter.taskIds.includes(task.id)) {
|
|
125
|
+
return false;
|
|
126
|
+
}
|
|
127
|
+
// Status filter — unified lifecycle control
|
|
128
|
+
const effectiveStatus = task.status ?? "active";
|
|
129
|
+
const isTargetedById = filter?.taskIds && filter.taskIds.includes(task.id);
|
|
130
|
+
if (effectiveStatus === "archived")
|
|
131
|
+
return false;
|
|
132
|
+
if (effectiveStatus === "paused" && !isTargetedById)
|
|
133
|
+
return false;
|
|
134
|
+
if (effectiveStatus === "draft" &&
|
|
135
|
+
!isTargetedById &&
|
|
136
|
+
!filter?.includeDrafts) {
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
139
|
+
// Tag filter
|
|
140
|
+
if (filter?.tags &&
|
|
141
|
+
filter.tags.length > 0 &&
|
|
142
|
+
(!task.tags || !task.tags.some((t) => filter.tags.includes(t)))) {
|
|
143
|
+
return false;
|
|
144
|
+
}
|
|
145
|
+
return true;
|
|
187
146
|
}
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* config applies. This drives whether the pipeline runs in validate-only
|
|
7
7
|
* mode or full eval mode, and whether results are blocking.
|
|
8
8
|
*
|
|
9
|
-
* @see docs/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
|
|
9
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
|
|
10
10
|
*/
|
|
11
11
|
export type TriggerContext = {
|
|
12
12
|
type: "pr";
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* config applies. This drives whether the pipeline runs in validate-only
|
|
7
7
|
* mode or full eval mode, and whether results are blocking.
|
|
8
8
|
*
|
|
9
|
-
* @see docs/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
|
|
9
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
|
|
10
10
|
*/
|
|
11
11
|
import { existsSync, readFileSync } from "fs";
|
|
12
12
|
import { resolve } from "path";
|
|
@@ -1,8 +1,39 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* repo-validation.ts —
|
|
2
|
+
* repo-validation.ts — Semantic validation for task definitions.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Checks that go beyond Zod schema parsing:
|
|
5
|
+
* - Assertion types are in the curated set
|
|
6
|
+
* - Rubric template names resolve to known templates
|
|
7
|
+
* - Doc ref slugs look reasonable (slugs, not URLs)
|
|
8
|
+
* - Tasks have at least one LLM rubric assertion (recommended)
|
|
9
|
+
* - Tasks have a prompt text (recommended)
|
|
10
|
+
*
|
|
11
|
+
* These produce warnings, not errors — the pipeline can still run
|
|
12
|
+
* with imperfect tasks. Only structural failures (caught by Zod) block.
|
|
13
|
+
*
|
|
14
|
+
* Previously this file re-exported from @sanity/ailf-tasks. That package
|
|
15
|
+
* has been eliminated — all validation logic now lives here.
|
|
16
|
+
*/
|
|
17
|
+
import { type CanonicalTask } from "./repo-schemas.js";
|
|
18
|
+
export interface ValidationResult {
|
|
19
|
+
valid: boolean;
|
|
20
|
+
errors: ValidationMessage[];
|
|
21
|
+
warnings: ValidationMessage[];
|
|
22
|
+
}
|
|
23
|
+
export interface ValidationMessage {
|
|
24
|
+
taskId: string;
|
|
25
|
+
field: string;
|
|
26
|
+
message: string;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Run semantic validation on an array of parsed canonical tasks.
|
|
30
|
+
*
|
|
31
|
+
* Returns warnings for issues that don't block execution (unknown feature
|
|
32
|
+
* areas, unresolved slugs) and errors for issues that would cause pipeline
|
|
33
|
+
* failures (completely missing required fields — though Zod catches most).
|
|
34
|
+
*/
|
|
35
|
+
export declare function validateCanonicalTasks(tasks: CanonicalTask[]): ValidationResult;
|
|
36
|
+
/**
|
|
37
|
+
* Format validation results for console output.
|
|
7
38
|
*/
|
|
8
|
-
export
|
|
39
|
+
export declare function formatValidationResult(result: ValidationResult): string;
|