@sanity/ailf 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +29 -12
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
- package/dist/_vendor/ailf-core/config-helpers.js +51 -2
- package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
- package/dist/_vendor/ailf-core/examples/index.js +213 -94
- package/dist/_vendor/ailf-core/index.d.ts +3 -2
- package/dist/_vendor/ailf-core/index.js +2 -1
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
- package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +7 -1
- package/dist/adapters/config-sources/ts-config-loader.js +21 -13
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
- package/dist/adapters/task-sources/index.d.ts +3 -4
- package/dist/adapters/task-sources/index.js +3 -4
- package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
- package/dist/adapters/task-sources/repo-schemas.js +228 -20
- package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
- package/dist/adapters/task-sources/repo-task-source.js +81 -122
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
- package/dist/adapters/task-sources/task-file-loader.js +21 -7
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/coverage-audit.js +3 -1
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +5 -4
- package/dist/commands/init.js +190 -25
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +15 -4
- package/dist/composition-root.js +100 -55
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/build-step-sequence.js +4 -2
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +32 -19
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +77 -26
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +51 -31
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
- package/dist/pipeline/compiler/literacy-bridge.js +2 -2
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
- package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
- package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/expand-tasks.d.ts +2 -2
- package/dist/pipeline/expand-tasks.js +2 -2
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +16 -9
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +16 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
- package/dist/pipeline/mirror-repo-tasks.js +10 -10
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +68 -30
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +32 -24
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
|
@@ -2,15 +2,13 @@
|
|
|
2
2
|
* Adapter: Load task definitions from the Sanity Content Lake.
|
|
3
3
|
*
|
|
4
4
|
* Fetches ailf.task documents via GROQ and maps them to
|
|
5
|
-
* GeneralizedTaskDefinition (LiteracyTaskDefinition variant).
|
|
6
|
-
*
|
|
7
|
-
* produce identical GeneralizedTaskDefinition[] for the same logical
|
|
8
|
-
* tasks. The pipeline never knows which adapter loaded the tasks.
|
|
5
|
+
* GeneralizedTaskDefinition (LiteracyTaskDefinition variant).
|
|
6
|
+
* The pipeline never knows which adapter loaded the tasks.
|
|
9
7
|
*
|
|
10
|
-
* Wired in the composition root
|
|
8
|
+
* Wired in the composition root as the default task source.
|
|
11
9
|
*
|
|
12
10
|
* @see packages/core/src/ports/task-source.ts — TaskSource port
|
|
13
|
-
* @see docs/exec-plans/tasks-as-content/phase-2-pipeline-integration.md
|
|
11
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-2-pipeline-integration.md
|
|
14
12
|
*/
|
|
15
13
|
import type { SanityClient } from "@sanity/client";
|
|
16
14
|
import type { FilterOptions, GeneralizedTaskDefinition, TaskSource } from "../../_vendor/ailf-core/index.d.ts";
|
|
@@ -2,15 +2,13 @@
|
|
|
2
2
|
* Adapter: Load task definitions from the Sanity Content Lake.
|
|
3
3
|
*
|
|
4
4
|
* Fetches ailf.task documents via GROQ and maps them to
|
|
5
|
-
* GeneralizedTaskDefinition (LiteracyTaskDefinition variant).
|
|
6
|
-
*
|
|
7
|
-
* produce identical GeneralizedTaskDefinition[] for the same logical
|
|
8
|
-
* tasks. The pipeline never knows which adapter loaded the tasks.
|
|
5
|
+
* GeneralizedTaskDefinition (LiteracyTaskDefinition variant).
|
|
6
|
+
* The pipeline never knows which adapter loaded the tasks.
|
|
9
7
|
*
|
|
10
|
-
* Wired in the composition root
|
|
8
|
+
* Wired in the composition root as the default task source.
|
|
11
9
|
*
|
|
12
10
|
* @see packages/core/src/ports/task-source.ts — TaskSource port
|
|
13
|
-
* @see docs/exec-plans/tasks-as-content/phase-2-pipeline-integration.md
|
|
11
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-2-pipeline-integration.md
|
|
14
12
|
*/
|
|
15
13
|
// ---------------------------------------------------------------------------
|
|
16
14
|
// GROQ query — fetches ailf.task documents with resolved references
|
|
@@ -20,8 +18,8 @@
|
|
|
20
18
|
* mapping to LiteracyTaskDefinition.
|
|
21
19
|
*
|
|
22
20
|
* Key projections:
|
|
23
|
-
* -
|
|
24
|
-
* -
|
|
21
|
+
* - area reference → dereferenced areaId string
|
|
22
|
+
* - contextDocs[] → dereferenced article slugs with reason
|
|
25
23
|
* - referenceSolution → title (for identification, not full content)
|
|
26
24
|
*
|
|
27
25
|
* Filter parameters:
|
|
@@ -30,7 +28,7 @@
|
|
|
30
28
|
*/
|
|
31
29
|
const TASKS_QUERY = /* groq */ `
|
|
32
30
|
*[_type == "ailf.task"
|
|
33
|
-
&& (!defined($areas) ||
|
|
31
|
+
&& (!defined($areas) || area->areaId.current in $areas)
|
|
34
32
|
&& (!defined($taskIds) || id.current in $taskIds)
|
|
35
33
|
&& (
|
|
36
34
|
// Status-based filtering (unified — replaces execution.enabled)
|
|
@@ -41,13 +39,13 @@ const TASKS_QUERY = /* groq */ `
|
|
|
41
39
|
|| (defined($taskIds) && status != "archived")
|
|
42
40
|
)
|
|
43
41
|
&& (!defined($tags) || count((tags)[@ in $tags]) > 0)
|
|
44
|
-
] | order(
|
|
42
|
+
] | order(area->areaId.current asc, id.current asc) {
|
|
45
43
|
"taskId": id.current,
|
|
46
|
-
|
|
47
|
-
"
|
|
48
|
-
|
|
44
|
+
title,
|
|
45
|
+
"areaId": area->areaId.current,
|
|
46
|
+
promptText,
|
|
49
47
|
docCoverage,
|
|
50
|
-
"
|
|
48
|
+
"contextDocs": contextDocs[] {
|
|
51
49
|
refType,
|
|
52
50
|
"slug": doc->slug.current,
|
|
53
51
|
"docRefId": doc->_id,
|
|
@@ -57,7 +55,7 @@ const TASKS_QUERY = /* groq */ `
|
|
|
57
55
|
perspective,
|
|
58
56
|
reason
|
|
59
57
|
},
|
|
60
|
-
|
|
58
|
+
assertions,
|
|
61
59
|
rawAssert,
|
|
62
60
|
baseline,
|
|
63
61
|
tags,
|
|
@@ -112,22 +110,19 @@ function buildGroqParams(filter) {
|
|
|
112
110
|
* Map a Content Lake ailf.task document directly to a LiteracyTaskDefinition.
|
|
113
111
|
*
|
|
114
112
|
* Returns null if the document is missing required fields (taskId,
|
|
115
|
-
*
|
|
113
|
+
* title, areaId, promptText). These are required by the
|
|
116
114
|
* Studio schema, but defensive coding handles edge cases (drafts,
|
|
117
115
|
* partially-created documents, etc.).
|
|
118
116
|
*/
|
|
119
117
|
function mapToLiteracyTask(raw) {
|
|
120
118
|
// Required fields — skip malformed documents
|
|
121
|
-
if (!raw.taskId ||
|
|
122
|
-
!raw.description ||
|
|
123
|
-
!raw.featureAreaId ||
|
|
124
|
-
!raw.taskPrompt) {
|
|
119
|
+
if (!raw.taskId || !raw.title || !raw.areaId || !raw.promptText) {
|
|
125
120
|
return null;
|
|
126
121
|
}
|
|
127
|
-
const docs = (raw.
|
|
122
|
+
const docs = (raw.contextDocs ?? [])
|
|
128
123
|
.map(mapCanonicalDocRef)
|
|
129
124
|
.filter((d) => d !== null);
|
|
130
|
-
const assertions = mapAssertions(raw.
|
|
125
|
+
const assertions = mapAssertions(raw.assertions ?? []);
|
|
131
126
|
// Append raw pass-through assertions (escape hatch for arbitrary Promptfoo
|
|
132
127
|
// assertion types that aren't in the curated list). These bypass template
|
|
133
128
|
// resolution and flow directly into the expanded Promptfoo test case as
|
|
@@ -158,9 +153,9 @@ function mapToLiteracyTask(raw) {
|
|
|
158
153
|
return {
|
|
159
154
|
mode: "literacy",
|
|
160
155
|
id: raw.taskId,
|
|
161
|
-
title: raw.
|
|
162
|
-
area: raw.
|
|
163
|
-
prompt: { text: raw.
|
|
156
|
+
title: raw.title,
|
|
157
|
+
area: raw.areaId,
|
|
158
|
+
prompt: { text: raw.promptText },
|
|
164
159
|
context: { docs },
|
|
165
160
|
assertions: allAssertions,
|
|
166
161
|
docCoverage: raw.docCoverage ?? false,
|
|
@@ -172,7 +167,7 @@ function mapToLiteracyTask(raw) {
|
|
|
172
167
|
};
|
|
173
168
|
}
|
|
174
169
|
/**
|
|
175
|
-
* Map a Content Lake
|
|
170
|
+
* Map a Content Lake context doc entry to the polymorphic CanonicalDocRef.
|
|
176
171
|
*
|
|
177
172
|
* Uses `refType` to determine which value field to read. Falls back to
|
|
178
173
|
* slug-based resolution for backward compatibility (documents created
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
export { CompositeTaskSource } from "./composite-task-source.js";
|
|
2
2
|
export { ContentLakeTaskSource } from "./content-lake-task-source.js";
|
|
3
|
-
export {
|
|
3
|
+
export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, parseRepoConfig, RepoConfigSchema, RUBRIC_TEMPLATE_NAMES, type CanonicalTask, type CuratedAssertionType, type RepoConfig, type RubricTemplateName, } from "./repo-schemas.js";
|
|
4
4
|
export { RepoTaskSource } from "./repo-task-source.js";
|
|
5
5
|
export { detectTriggerContext, resolveTrigger, type ResolvedTrigger, type TriggerContext, } from "./repo-trigger.js";
|
|
6
|
-
export { formatValidationResult,
|
|
7
|
-
export { discoverTsTaskFiles, loadAllTsTaskFiles, loadTsTaskFile, } from "./task-file-loader.js";
|
|
8
|
-
export { YamlTaskSource } from "./yaml-task-source.js";
|
|
6
|
+
export { formatValidationResult, validateCanonicalTasks, type ValidationMessage, type ValidationResult, } from "./repo-validation.js";
|
|
7
|
+
export { discoverTsTaskFiles, loadAllTsTaskFiles, loadTsTaskFile, loadTsTaskFileSync, } from "./task-file-loader.js";
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
export { CompositeTaskSource } from "./composite-task-source.js";
|
|
2
2
|
export { ContentLakeTaskSource } from "./content-lake-task-source.js";
|
|
3
|
-
export {
|
|
3
|
+
export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, parseRepoConfig, RepoConfigSchema, RUBRIC_TEMPLATE_NAMES, } from "./repo-schemas.js";
|
|
4
4
|
export { RepoTaskSource } from "./repo-task-source.js";
|
|
5
5
|
export { detectTriggerContext, resolveTrigger, } from "./repo-trigger.js";
|
|
6
|
-
export { formatValidationResult,
|
|
7
|
-
export { discoverTsTaskFiles, loadAllTsTaskFiles, loadTsTaskFile, } from "./task-file-loader.js";
|
|
8
|
-
export { YamlTaskSource } from "./yaml-task-source.js";
|
|
6
|
+
export { formatValidationResult, validateCanonicalTasks, } from "./repo-validation.js";
|
|
7
|
+
export { discoverTsTaskFiles, loadAllTsTaskFiles, loadTsTaskFile, loadTsTaskFileSync, } from "./task-file-loader.js";
|
|
@@ -1,29 +1,231 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* repo-schemas.ts —
|
|
2
|
+
* repo-schemas.ts — Canonical Zod schemas for task and config validation.
|
|
3
3
|
*
|
|
4
|
-
* Task schemas
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Task schemas validate .ailf/tasks/*.yaml and .task.ts files against the
|
|
5
|
+
* canonical GeneralizedTaskDefinition shape. Field names match the internal
|
|
6
|
+
* domain model: `area` (not featureArea), `assertions` (not assert),
|
|
7
|
+
* `context.docs` (not canonicalDocs), `prompt.text` (not vars.task).
|
|
7
8
|
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
* external tools that only validate task YAML.
|
|
9
|
+
* Previously this file re-exported from @sanity/ailf-tasks. That package
|
|
10
|
+
* has been eliminated — all schema logic now lives here.
|
|
11
11
|
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
12
|
+
* Config schemas (RepoConfigSchema, trigger config) are eval-pipeline-
|
|
13
|
+
* specific and remain here unchanged.
|
|
14
|
+
*
|
|
15
|
+
* @see packages/core/src/types/generalized-task.ts — canonical TypeScript types
|
|
16
|
+
* @see docs/archive/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
|
|
14
17
|
*/
|
|
15
|
-
import { RepoTaskFileSchema as _Schema } from "../../_vendor/ailf-tasks/index.d.ts";
|
|
16
18
|
import { z } from "zod";
|
|
17
|
-
export { CURATED_ASSERTION_TYPES, RepoTaskFileSchema, RepoTaskSchema, RUBRIC_TEMPLATE_NAMES, type CuratedAssertionType, type RepoTask, type RubricTemplateName, } from "../../_vendor/ailf-tasks/index.d.ts";
|
|
18
|
-
export { loadTaskDir, parseTaskFile } from "../../_vendor/ailf-tasks/index.d.ts";
|
|
19
19
|
/**
|
|
20
|
-
*
|
|
21
|
-
*
|
|
20
|
+
* The set of assertion types allowed in task files.
|
|
21
|
+
*
|
|
22
|
+
* This is a curated subset of Promptfoo assertion types — we expose only the
|
|
23
|
+
* types that are stable, well-documented, and useful for external authors.
|
|
24
|
+
*/
|
|
25
|
+
export declare const CURATED_ASSERTION_TYPES: readonly ["llm-rubric", "contains", "contains-any", "contains-all", "not-contains", "icontains", "icontains-any", "regex", "javascript", "similar", "cost", "latency"];
|
|
26
|
+
export type CuratedAssertionType = (typeof CURATED_ASSERTION_TYPES)[number];
|
|
27
|
+
/**
|
|
28
|
+
* Valid rubric template names — must match keys in config/rubrics.yaml.
|
|
29
|
+
*/
|
|
30
|
+
export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage"];
|
|
31
|
+
export type RubricTemplateName = (typeof RUBRIC_TEMPLATE_NAMES)[number];
|
|
32
|
+
/**
|
|
33
|
+
* Zod schema for a single task definition using canonical field names.
|
|
34
|
+
*
|
|
35
|
+
* Uses .passthrough() to allow mode-specific fields (serverConfig, sandbox,
|
|
36
|
+
* handler, etc.) without listing every possible field. Mode-specific
|
|
37
|
+
* validation is deferred to the pipeline's mode handlers.
|
|
38
|
+
*/
|
|
39
|
+
export declare const CanonicalTaskSchema: z.ZodObject<{
|
|
40
|
+
id: z.ZodString;
|
|
41
|
+
mode: z.ZodDefault<z.ZodString>;
|
|
42
|
+
title: z.ZodString;
|
|
43
|
+
description: z.ZodOptional<z.ZodString>;
|
|
44
|
+
area: z.ZodOptional<z.ZodString>;
|
|
45
|
+
difficulty: z.ZodOptional<z.ZodEnum<{
|
|
46
|
+
basic: "basic";
|
|
47
|
+
intermediate: "intermediate";
|
|
48
|
+
advanced: "advanced";
|
|
49
|
+
}>>;
|
|
50
|
+
status: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
|
|
51
|
+
active: "active";
|
|
52
|
+
draft: "draft";
|
|
53
|
+
paused: "paused";
|
|
54
|
+
archived: "archived";
|
|
55
|
+
}>>>;
|
|
56
|
+
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
57
|
+
prompt: z.ZodOptional<z.ZodObject<{
|
|
58
|
+
template: z.ZodOptional<z.ZodString>;
|
|
59
|
+
text: z.ZodOptional<z.ZodString>;
|
|
60
|
+
systemMessage: z.ZodOptional<z.ZodString>;
|
|
61
|
+
vars: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
62
|
+
}, z.core.$strip>>;
|
|
63
|
+
context: z.ZodOptional<z.ZodObject<{
|
|
64
|
+
docs: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
65
|
+
id: z.ZodString;
|
|
66
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
67
|
+
slug: z.ZodOptional<z.ZodString>;
|
|
68
|
+
path: z.ZodOptional<z.ZodString>;
|
|
69
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
70
|
+
slug: z.ZodString;
|
|
71
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
72
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
73
|
+
path: z.ZodString;
|
|
74
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
75
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
76
|
+
perspective: z.ZodString;
|
|
77
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
78
|
+
}, z.core.$strip>]>>>;
|
|
79
|
+
fixtures: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
80
|
+
}, z.core.$strip>>;
|
|
81
|
+
assertions: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
82
|
+
type: z.ZodLiteral<"llm-rubric">;
|
|
83
|
+
template: z.ZodEnum<{
|
|
84
|
+
"task-completion": "task-completion";
|
|
85
|
+
"code-correctness": "code-correctness";
|
|
86
|
+
"doc-coverage": "doc-coverage";
|
|
87
|
+
}>;
|
|
88
|
+
criteria: z.ZodArray<z.ZodString>;
|
|
89
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
90
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
91
|
+
type: z.ZodEnum<{
|
|
92
|
+
"llm-rubric": "llm-rubric";
|
|
93
|
+
contains: "contains";
|
|
94
|
+
"contains-any": "contains-any";
|
|
95
|
+
"contains-all": "contains-all";
|
|
96
|
+
"not-contains": "not-contains";
|
|
97
|
+
icontains: "icontains";
|
|
98
|
+
"icontains-any": "icontains-any";
|
|
99
|
+
regex: "regex";
|
|
100
|
+
javascript: "javascript";
|
|
101
|
+
similar: "similar";
|
|
102
|
+
cost: "cost";
|
|
103
|
+
latency: "latency";
|
|
104
|
+
}>;
|
|
105
|
+
value: z.ZodOptional<z.ZodUnknown>;
|
|
106
|
+
threshold: z.ZodOptional<z.ZodNumber>;
|
|
107
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
108
|
+
}, z.core.$loose>]>>>;
|
|
109
|
+
referenceSolution: z.ZodOptional<z.ZodString>;
|
|
110
|
+
docCoverage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
111
|
+
baseline: z.ZodOptional<z.ZodObject<{
|
|
112
|
+
enabled: z.ZodOptional<z.ZodBoolean>;
|
|
113
|
+
rubric: z.ZodOptional<z.ZodEnum<{
|
|
114
|
+
full: "full";
|
|
115
|
+
abbreviated: "abbreviated";
|
|
116
|
+
none: "none";
|
|
117
|
+
}>>;
|
|
118
|
+
}, z.core.$strip>>;
|
|
119
|
+
rubric: z.ZodOptional<z.ZodUnknown>;
|
|
120
|
+
providers: z.ZodOptional<z.ZodArray<z.ZodUnknown>>;
|
|
121
|
+
options: z.ZodOptional<z.ZodUnknown>;
|
|
122
|
+
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
123
|
+
}, z.core.$loose>;
|
|
124
|
+
export type CanonicalTask = z.infer<typeof CanonicalTaskSchema>;
|
|
125
|
+
/**
|
|
126
|
+
* Schema for an array of canonical tasks — what a single .ailf/tasks/*.yaml
|
|
127
|
+
* file contains. Each file must define at least one task.
|
|
128
|
+
*/
|
|
129
|
+
export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodObject<{
|
|
130
|
+
id: z.ZodString;
|
|
131
|
+
mode: z.ZodDefault<z.ZodString>;
|
|
132
|
+
title: z.ZodString;
|
|
133
|
+
description: z.ZodOptional<z.ZodString>;
|
|
134
|
+
area: z.ZodOptional<z.ZodString>;
|
|
135
|
+
difficulty: z.ZodOptional<z.ZodEnum<{
|
|
136
|
+
basic: "basic";
|
|
137
|
+
intermediate: "intermediate";
|
|
138
|
+
advanced: "advanced";
|
|
139
|
+
}>>;
|
|
140
|
+
status: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
|
|
141
|
+
active: "active";
|
|
142
|
+
draft: "draft";
|
|
143
|
+
paused: "paused";
|
|
144
|
+
archived: "archived";
|
|
145
|
+
}>>>;
|
|
146
|
+
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
147
|
+
prompt: z.ZodOptional<z.ZodObject<{
|
|
148
|
+
template: z.ZodOptional<z.ZodString>;
|
|
149
|
+
text: z.ZodOptional<z.ZodString>;
|
|
150
|
+
systemMessage: z.ZodOptional<z.ZodString>;
|
|
151
|
+
vars: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
152
|
+
}, z.core.$strip>>;
|
|
153
|
+
context: z.ZodOptional<z.ZodObject<{
|
|
154
|
+
docs: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
155
|
+
id: z.ZodString;
|
|
156
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
157
|
+
slug: z.ZodOptional<z.ZodString>;
|
|
158
|
+
path: z.ZodOptional<z.ZodString>;
|
|
159
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
160
|
+
slug: z.ZodString;
|
|
161
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
162
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
163
|
+
path: z.ZodString;
|
|
164
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
165
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
166
|
+
perspective: z.ZodString;
|
|
167
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
168
|
+
}, z.core.$strip>]>>>;
|
|
169
|
+
fixtures: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
170
|
+
}, z.core.$strip>>;
|
|
171
|
+
assertions: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
172
|
+
type: z.ZodLiteral<"llm-rubric">;
|
|
173
|
+
template: z.ZodEnum<{
|
|
174
|
+
"task-completion": "task-completion";
|
|
175
|
+
"code-correctness": "code-correctness";
|
|
176
|
+
"doc-coverage": "doc-coverage";
|
|
177
|
+
}>;
|
|
178
|
+
criteria: z.ZodArray<z.ZodString>;
|
|
179
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
180
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
181
|
+
type: z.ZodEnum<{
|
|
182
|
+
"llm-rubric": "llm-rubric";
|
|
183
|
+
contains: "contains";
|
|
184
|
+
"contains-any": "contains-any";
|
|
185
|
+
"contains-all": "contains-all";
|
|
186
|
+
"not-contains": "not-contains";
|
|
187
|
+
icontains: "icontains";
|
|
188
|
+
"icontains-any": "icontains-any";
|
|
189
|
+
regex: "regex";
|
|
190
|
+
javascript: "javascript";
|
|
191
|
+
similar: "similar";
|
|
192
|
+
cost: "cost";
|
|
193
|
+
latency: "latency";
|
|
194
|
+
}>;
|
|
195
|
+
value: z.ZodOptional<z.ZodUnknown>;
|
|
196
|
+
threshold: z.ZodOptional<z.ZodNumber>;
|
|
197
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
198
|
+
}, z.core.$loose>]>>>;
|
|
199
|
+
referenceSolution: z.ZodOptional<z.ZodString>;
|
|
200
|
+
docCoverage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
201
|
+
baseline: z.ZodOptional<z.ZodObject<{
|
|
202
|
+
enabled: z.ZodOptional<z.ZodBoolean>;
|
|
203
|
+
rubric: z.ZodOptional<z.ZodEnum<{
|
|
204
|
+
full: "full";
|
|
205
|
+
abbreviated: "abbreviated";
|
|
206
|
+
none: "none";
|
|
207
|
+
}>>;
|
|
208
|
+
}, z.core.$strip>>;
|
|
209
|
+
rubric: z.ZodOptional<z.ZodUnknown>;
|
|
210
|
+
providers: z.ZodOptional<z.ZodArray<z.ZodUnknown>>;
|
|
211
|
+
options: z.ZodOptional<z.ZodUnknown>;
|
|
212
|
+
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
213
|
+
}, z.core.$loose>>;
|
|
214
|
+
/**
|
|
215
|
+
* Parse and validate a task file's content against the canonical schema.
|
|
216
|
+
* Returns typed tasks or throws with a user-friendly Zod error message.
|
|
217
|
+
*
|
|
218
|
+
* Accepts pre-parsed YAML data (unknown), not a raw string.
|
|
219
|
+
*/
|
|
220
|
+
export declare function parseCanonicalTaskFile(raw: unknown, filename: string): CanonicalTask[];
|
|
221
|
+
/**
|
|
222
|
+
* Detect legacy field names in raw task data and return helpful messages.
|
|
22
223
|
*
|
|
23
|
-
*
|
|
24
|
-
*
|
|
224
|
+
* Runs BEFORE Zod parsing to catch the most common migration mistake —
|
|
225
|
+
* using old field names from @sanity/ailf-tasks instead of the canonical
|
|
226
|
+
* GeneralizedTaskDefinition shape.
|
|
25
227
|
*/
|
|
26
|
-
export declare function
|
|
228
|
+
export declare function detectLegacyFieldNames(raw: unknown, filename: string): string[];
|
|
27
229
|
/**
|
|
28
230
|
* Zod schema for .ailf/config.yaml — controls documentation source,
|
|
29
231
|
* report destination, and trigger behavior for evaluations from an
|