@sanity/ailf 0.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/config/features.ts +23 -0
- package/config/models.ts +95 -0
- package/config/prompts.ts +16 -0
- package/config/rubrics.ts +225 -0
- package/config/schedules.ts +47 -0
- package/config/sinks.ts +37 -0
- package/config/sources.ts +21 -0
- package/config/thresholds.ts +61 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +171 -0
- package/dist/_vendor/ailf-core/config-helpers.js +170 -0
- package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
- package/dist/_vendor/ailf-core/env-helper.js +45 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/examples/index.js +25 -0
- package/dist/_vendor/ailf-core/index.d.ts +3 -0
- package/dist/_vendor/ailf-core/index.js +5 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +17 -2
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
- package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
- package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +8 -2
- package/dist/_vendor/ailf-core/schemas/eval-config.js +17 -2
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +9 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +8 -1
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -31
- package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -9
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
- package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
- package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/services/index.js +2 -1
- package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
- package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
- package/dist/_vendor/ailf-core/services/scoring.js +25 -15
- package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
- package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +332 -0
- package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +45 -83
- package/dist/_vendor/ailf-core/types/index.js +8 -1
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +257 -0
- package/dist/_vendor/ailf-core/types/plugin-registry.js +185 -0
- package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
- package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
- package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
- package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
- package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
- package/dist/_vendor/ailf-core/types/trace.js +18 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
- package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
- package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
- package/dist/_vendor/ailf-shared/index.d.ts +0 -1
- package/dist/_vendor/ailf-shared/index.js +0 -1
- package/dist/adapters/api-client/build-request.js +14 -13
- package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
- package/dist/adapters/config-sources/file-config-adapter.js +39 -12
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +1 -0
- package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
- package/dist/adapters/config-sources/ts-config-loader.js +141 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
- package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +35 -39
- package/dist/adapters/task-sources/index.d.ts +3 -2
- package/dist/adapters/task-sources/index.js +3 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
- package/dist/adapters/task-sources/repo-schemas.js +227 -19
- package/dist/adapters/task-sources/repo-task-source.d.ts +16 -12
- package/dist/adapters/task-sources/repo-task-source.js +92 -80
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
- package/dist/adapters/task-sources/task-file-loader.js +83 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
- package/dist/adapters/task-sources/yaml-task-source.js +19 -16
- package/dist/cli.js +0 -2
- package/dist/commands/baseline.js +4 -1
- package/dist/commands/calculate-scores.js +1 -1
- package/dist/commands/coverage-audit.js +9 -1
- package/dist/commands/explain-handler.js +25 -23
- package/dist/commands/fetch-docs.js +3 -2
- package/dist/commands/generate-configs.js +1 -1
- package/dist/commands/init.d.ts +6 -4
- package/dist/commands/init.js +302 -23
- package/dist/commands/interactive.js +11 -7
- package/dist/commands/pipeline-action.d.ts +2 -0
- package/dist/commands/pipeline-action.js +16 -6
- package/dist/commands/pipeline.d.ts +1 -0
- package/dist/commands/pipeline.js +4 -2
- package/dist/commands/pr-comment.js +1 -1
- package/dist/commands/publish.js +2 -2
- package/dist/commands/readiness-report.js +13 -6
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +13 -1
- package/dist/composition-root.js +99 -4
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/orchestration/build-app-context.js +1 -0
- package/dist/orchestration/build-step-sequence.js +28 -8
- package/dist/orchestration/steps/calculate-scores-step.js +24 -11
- package/dist/orchestration/steps/fetch-docs-step.js +8 -7
- package/dist/orchestration/steps/gap-analysis-step.js +8 -7
- package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
- package/dist/orchestration/steps/generate-configs-step.js +261 -51
- package/dist/orchestration/steps/grader-consistency-step.js +7 -4
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/readiness-step.js +5 -6
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
- package/dist/orchestration/steps/run-eval-step.js +8 -7
- package/dist/pipeline/cache.d.ts +1 -1
- package/dist/pipeline/cache.js +36 -8
- package/dist/pipeline/calculate-scores.d.ts +2 -4
- package/dist/pipeline/calculate-scores.js +43 -113
- package/dist/pipeline/checks.js +2 -2
- package/dist/pipeline/compare.js +8 -8
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +392 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +404 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
- package/dist/pipeline/compiler/assertion-mapper.js +175 -0
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
- package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
- package/dist/pipeline/compiler/config-loader.d.ts +56 -0
- package/dist/pipeline/compiler/config-loader.js +111 -0
- package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
- package/dist/pipeline/compiler/fixture-resolver.js +113 -0
- package/dist/pipeline/compiler/hash.d.ts +11 -0
- package/dist/pipeline/compiler/hash.js +18 -0
- package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
- package/dist/pipeline/compiler/ignore-fields.js +113 -0
- package/dist/pipeline/compiler/index.d.ts +29 -0
- package/dist/pipeline/compiler/index.js +45 -0
- package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
- package/dist/pipeline/compiler/literacy-bridge.js +172 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +15 -0
- package/dist/pipeline/compiler/mode-handlers/index.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/index.d.ts +9 -0
- package/dist/pipeline/compiler/presets/index.js +8 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +42 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.js +208 -0
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
- package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
- package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
- package/dist/pipeline/compiler/provider-assembler.js +137 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
- package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
- package/dist/pipeline/compiler/sandbox/index.js +11 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
- package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
- package/dist/pipeline/compiler/scoring-bridge.js +114 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
- package/dist/pipeline/compiler/task-graph-builder.js +291 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
- package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
- package/dist/pipeline/compiler/telemetry/index.js +19 -0
- package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
- package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
- package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
- package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
- package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
- package/dist/pipeline/compiler/variable-resolver.js +115 -0
- package/dist/pipeline/coverage-audit.d.ts +15 -5
- package/dist/pipeline/coverage-audit.js +41 -22
- package/dist/pipeline/eval-constants.d.ts +16 -6
- package/dist/pipeline/eval-constants.js +25 -4
- package/dist/pipeline/eval-fingerprint.d.ts +2 -2
- package/dist/pipeline/eval-fingerprint.js +8 -9
- package/dist/pipeline/expand-tasks.d.ts +19 -10
- package/dist/pipeline/expand-tasks.js +34 -28
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +2 -2
- package/dist/pipeline/generate-configs.d.ts +22 -4
- package/dist/pipeline/generate-configs.js +53 -24
- package/dist/pipeline/grader-api.d.ts +3 -3
- package/dist/pipeline/grader-api.js +5 -12
- package/dist/pipeline/grader-compare-runner.js +20 -27
- package/dist/pipeline/grader-comparison.d.ts +4 -8
- package/dist/pipeline/grader-comparison.js +11 -17
- package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
- package/dist/pipeline/grader-consistency-runner.js +16 -20
- package/dist/pipeline/grader-consistency.d.ts +6 -10
- package/dist/pipeline/grader-consistency.js +13 -32
- package/dist/pipeline/grader-sensitivity-runner.js +7 -5
- package/dist/pipeline/grader-sensitivity.d.ts +2 -6
- package/dist/pipeline/grader-sensitivity.js +10 -10
- package/dist/pipeline/grader-validate-runner.js +7 -5
- package/dist/pipeline/grader-validation.d.ts +2 -6
- package/dist/pipeline/grader-validation.js +14 -22
- package/dist/pipeline/map-request-to-config.js +7 -1
- package/dist/pipeline/mirror-repo-tasks.d.ts +13 -13
- package/dist/pipeline/mirror-repo-tasks.js +22 -21
- package/dist/pipeline/normalize-mode.d.ts +49 -0
- package/dist/pipeline/normalize-mode.js +64 -0
- package/dist/pipeline/plan.d.ts +5 -2
- package/dist/pipeline/plan.js +134 -78
- package/dist/pipeline/pr-comment.js +2 -0
- package/dist/pipeline/profile-resolution.d.ts +22 -14
- package/dist/pipeline/profile-resolution.js +41 -19
- package/dist/pipeline/provenance.d.ts +2 -2
- package/dist/pipeline/provenance.js +12 -17
- package/dist/pipeline/release-report.js +4 -4
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/rubric-loader.d.ts +20 -0
- package/dist/pipeline/rubric-loader.js +37 -0
- package/dist/pipeline/validate.d.ts +4 -4
- package/dist/pipeline/validate.js +64 -53
- package/dist/schedules/loader.js +18 -8
- package/dist/scripts/migrate-task-mode.d.ts +24 -0
- package/dist/scripts/migrate-task-mode.js +85 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +15 -15
- package/dist/sinks/loader.js +5 -7
- package/dist/sources.d.ts +7 -7
- package/dist/sources.js +22 -24
- package/dist/webhook/dispatch.js +2 -1
- package/package.json +15 -4
- package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
- package/tasks/literacy/frameworks.task.ts +128 -0
- package/tasks/literacy/functions.task.ts +69 -0
- package/tasks/literacy/groq.task.ts +258 -0
- package/tasks/literacy/nextjs-live.task.ts +75 -0
- package/tasks/literacy/studio-setup.task.ts +131 -0
- package/tasks/literacy/visual-editing.task.ts +146 -0
- package/config/features.yaml +0 -116
- package/config/models.yaml +0 -116
- package/config/prompts.yaml +0 -75
- package/config/rubrics.yaml +0 -81
- package/config/schedules.yaml +0 -43
- package/config/sinks.yaml +0 -54
- package/config/sources.yaml +0 -51
- package/config/thresholds.yaml +0 -49
- package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
- package/dist/_vendor/ailf-tasks/cli.js +0 -61
- package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
- package/dist/_vendor/ailf-tasks/index.js +0 -16
- package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
- package/dist/_vendor/ailf-tasks/parser.js +0 -73
- package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
- package/dist/_vendor/ailf-tasks/schemas.js +0 -180
- package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
- package/dist/_vendor/ailf-tasks/validation.js +0 -162
- package/dist/agent-observer/test-imports.d.ts +0 -7
- package/dist/agent-observer/test-imports.js +0 -185
|
@@ -1,8 +1,129 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* repo-validation.ts —
|
|
2
|
+
* repo-validation.ts — Semantic validation for task definitions.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Checks that go beyond Zod schema parsing:
|
|
5
|
+
* - Assertion types are in the curated set
|
|
6
|
+
* - Rubric template names resolve to known templates
|
|
7
|
+
* - Doc ref slugs look reasonable (slugs, not URLs)
|
|
8
|
+
* - Tasks have at least one LLM rubric assertion (recommended)
|
|
9
|
+
* - Tasks have a prompt text (recommended)
|
|
10
|
+
*
|
|
11
|
+
* These produce warnings, not errors — the pipeline can still run
|
|
12
|
+
* with imperfect tasks. Only structural failures (caught by Zod) block.
|
|
13
|
+
*
|
|
14
|
+
* Previously this file re-exported from @sanity/ailf-tasks. That package
|
|
15
|
+
* has been eliminated — all validation logic now lives here.
|
|
16
|
+
*/
|
|
17
|
+
import { CURATED_ASSERTION_TYPES, RUBRIC_TEMPLATE_NAMES, } from "./repo-schemas.js";
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
// Public API
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
/**
|
|
22
|
+
* Run semantic validation on an array of parsed canonical tasks.
|
|
23
|
+
*
|
|
24
|
+
* Returns warnings for issues that don't block execution (unknown feature
|
|
25
|
+
* areas, unresolved slugs) and errors for issues that would cause pipeline
|
|
26
|
+
* failures (completely missing required fields — though Zod catches most).
|
|
27
|
+
*/
|
|
28
|
+
export function validateCanonicalTasks(tasks) {
|
|
29
|
+
const errors = [];
|
|
30
|
+
const warnings = [];
|
|
31
|
+
// Check for duplicate IDs
|
|
32
|
+
const seenIds = new Set();
|
|
33
|
+
for (const task of tasks) {
|
|
34
|
+
if (seenIds.has(task.id)) {
|
|
35
|
+
errors.push({
|
|
36
|
+
taskId: task.id,
|
|
37
|
+
field: "id",
|
|
38
|
+
message: `Duplicate task ID "${task.id}"`,
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
seenIds.add(task.id);
|
|
42
|
+
}
|
|
43
|
+
for (const task of tasks) {
|
|
44
|
+
const assertions = task.assertions ?? [];
|
|
45
|
+
// Check assertion types
|
|
46
|
+
for (let i = 0; i < assertions.length; i++) {
|
|
47
|
+
const assertion = assertions[i];
|
|
48
|
+
if (!CURATED_ASSERTION_TYPES.includes(assertion.type)) {
|
|
49
|
+
warnings.push({
|
|
50
|
+
taskId: task.id,
|
|
51
|
+
field: `assertions[${i}].type`,
|
|
52
|
+
message: `Unknown assertion type "${assertion.type}". ` +
|
|
53
|
+
`Valid types: ${CURATED_ASSERTION_TYPES.join(", ")}`,
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
// Check rubric template for llm-rubric assertions
|
|
57
|
+
if (assertion.type === "llm-rubric" && "template" in assertion) {
|
|
58
|
+
const template = assertion.template;
|
|
59
|
+
if (!RUBRIC_TEMPLATE_NAMES.includes(template)) {
|
|
60
|
+
warnings.push({
|
|
61
|
+
taskId: task.id,
|
|
62
|
+
field: `assertions[${i}].template`,
|
|
63
|
+
message: `Unknown rubric template "${template}". ` +
|
|
64
|
+
`Valid templates: ${RUBRIC_TEMPLATE_NAMES.join(", ")}`,
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
// Check canonical doc refs look reasonable
|
|
70
|
+
const docs = task.context?.docs ?? [];
|
|
71
|
+
for (let i = 0; i < docs.length; i++) {
|
|
72
|
+
const doc = docs[i];
|
|
73
|
+
// Slug refs: warn if they look like URLs or paths
|
|
74
|
+
if ("slug" in doc && !("id" in doc) && typeof doc.slug === "string") {
|
|
75
|
+
if (doc.slug.includes("/") || doc.slug.includes("http")) {
|
|
76
|
+
warnings.push({
|
|
77
|
+
taskId: task.id,
|
|
78
|
+
field: `context.docs[${i}].slug`,
|
|
79
|
+
message: `Slug "${doc.slug}" looks like a URL or path — use 'path' type for paths or 'slug' for document slugs (e.g., "groq-introduction")`,
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
// Check task has at least one llm-rubric assertion (recommended but not required)
|
|
85
|
+
const hasLlmRubric = assertions.some((a) => a.type === "llm-rubric");
|
|
86
|
+
if (!hasLlmRubric) {
|
|
87
|
+
warnings.push({
|
|
88
|
+
taskId: task.id,
|
|
89
|
+
field: "assertions",
|
|
90
|
+
message: "No llm-rubric assertion found. Tasks should have at least one scored rubric for meaningful evaluation.",
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
// Check prompt text exists
|
|
94
|
+
if (!task.prompt?.text) {
|
|
95
|
+
warnings.push({
|
|
96
|
+
taskId: task.id,
|
|
97
|
+
field: "prompt.text",
|
|
98
|
+
message: "No task prompt found in prompt.text. The LLM will receive an empty implementation request.",
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
return {
|
|
103
|
+
valid: errors.length === 0,
|
|
104
|
+
errors,
|
|
105
|
+
warnings,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Format validation results for console output.
|
|
7
110
|
*/
|
|
8
|
-
export
|
|
111
|
+
export function formatValidationResult(result) {
|
|
112
|
+
const lines = [];
|
|
113
|
+
if (result.errors.length > 0) {
|
|
114
|
+
lines.push("Errors:");
|
|
115
|
+
for (const e of result.errors) {
|
|
116
|
+
lines.push(` [${e.taskId}] ${e.field}: ${e.message}`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
if (result.warnings.length > 0) {
|
|
120
|
+
lines.push("Warnings:");
|
|
121
|
+
for (const w of result.warnings) {
|
|
122
|
+
lines.push(` [${w.taskId}] ${w.field}: ${w.message}`);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
if (result.valid && result.warnings.length === 0) {
|
|
126
|
+
lines.push("All tasks pass validation");
|
|
127
|
+
}
|
|
128
|
+
return lines.join("\n");
|
|
129
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TaskFileLoader — loads task definitions from TypeScript files.
|
|
3
|
+
*
|
|
4
|
+
* Supplements the existing YAML-based task loading by supporting
|
|
5
|
+
* `*.task.ts` and `*.task.js` files in task directories. Files are
|
|
6
|
+
* loaded via jiti and expected to export GeneralizedTaskDefinition
|
|
7
|
+
* objects authored with `defineTask()`.
|
|
8
|
+
*
|
|
9
|
+
* TS task files export a single task or an array of tasks:
|
|
10
|
+
*
|
|
11
|
+
* ```typescript
|
|
12
|
+
* // single task
|
|
13
|
+
* import { defineTask } from "@sanity/ailf"
|
|
14
|
+
* export default defineTask({ id: "my-task", mode: "literacy", ... })
|
|
15
|
+
*
|
|
16
|
+
* // multiple tasks
|
|
17
|
+
* export default [
|
|
18
|
+
* defineTask({ id: "task-1", mode: "literacy", ... }),
|
|
19
|
+
* defineTask({ id: "task-2", mode: "literacy", ... }),
|
|
20
|
+
* ]
|
|
21
|
+
* ```
|
|
22
|
+
*
|
|
23
|
+
* The loader integrates into the existing RepoTaskSource adapter — TS
|
|
24
|
+
* task files are discovered alongside YAML files in the same directory.
|
|
25
|
+
*
|
|
26
|
+
* @see docs/design-docs/architecture-overhaul/typescript-configuration.md
|
|
27
|
+
*/
|
|
28
|
+
/** A raw task object loaded from a TS file (pre-validation) */
|
|
29
|
+
export interface RawTsTask {
|
|
30
|
+
/** Source file path (for error messages) */
|
|
31
|
+
filePath: string;
|
|
32
|
+
/** The loaded task data (may be a single object or an array) */
|
|
33
|
+
tasks: unknown[];
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Discover TS/JS task files in a directory.
|
|
37
|
+
*
|
|
38
|
+
* Looks for files matching `*.task.ts` or `*.task.js`.
|
|
39
|
+
*
|
|
40
|
+
* @param tasksDir - Absolute path to the tasks directory
|
|
41
|
+
* @returns Array of absolute file paths
|
|
42
|
+
*/
|
|
43
|
+
export declare function discoverTsTaskFiles(tasksDir: string): string[];
|
|
44
|
+
/**
|
|
45
|
+
* Load task definitions from a single TS/JS task file.
|
|
46
|
+
*
|
|
47
|
+
* The file's default export can be:
|
|
48
|
+
* - A single task object → wrapped in an array
|
|
49
|
+
* - An array of task objects → used as-is
|
|
50
|
+
*
|
|
51
|
+
* Returns the raw task data without validation — the caller is
|
|
52
|
+
* responsible for running the result through Zod schemas.
|
|
53
|
+
*
|
|
54
|
+
* @param filePath - Absolute path to the .task.ts or .task.js file
|
|
55
|
+
* @returns The loaded task(s), or throws on load failure
|
|
56
|
+
*/
|
|
57
|
+
export declare function loadTsTaskFile(filePath: string): Promise<RawTsTask>;
|
|
58
|
+
/**
|
|
59
|
+
* Load all TS task files from a directory.
|
|
60
|
+
*
|
|
61
|
+
* @param tasksDir - Absolute path to the tasks directory
|
|
62
|
+
* @returns Array of raw task data from all files
|
|
63
|
+
*/
|
|
64
|
+
export declare function loadAllTsTaskFiles(tasksDir: string): Promise<RawTsTask[]>;
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TaskFileLoader — loads task definitions from TypeScript files.
|
|
3
|
+
*
|
|
4
|
+
* Supplements the existing YAML-based task loading by supporting
|
|
5
|
+
* `*.task.ts` and `*.task.js` files in task directories. Files are
|
|
6
|
+
* loaded via jiti and expected to export GeneralizedTaskDefinition
|
|
7
|
+
* objects authored with `defineTask()`.
|
|
8
|
+
*
|
|
9
|
+
* TS task files export a single task or an array of tasks:
|
|
10
|
+
*
|
|
11
|
+
* ```typescript
|
|
12
|
+
* // single task
|
|
13
|
+
* import { defineTask } from "@sanity/ailf"
|
|
14
|
+
* export default defineTask({ id: "my-task", mode: "literacy", ... })
|
|
15
|
+
*
|
|
16
|
+
* // multiple tasks
|
|
17
|
+
* export default [
|
|
18
|
+
* defineTask({ id: "task-1", mode: "literacy", ... }),
|
|
19
|
+
* defineTask({ id: "task-2", mode: "literacy", ... }),
|
|
20
|
+
* ]
|
|
21
|
+
* ```
|
|
22
|
+
*
|
|
23
|
+
* The loader integrates into the existing RepoTaskSource adapter — TS
|
|
24
|
+
* task files are discovered alongside YAML files in the same directory.
|
|
25
|
+
*
|
|
26
|
+
* @see docs/design-docs/architecture-overhaul/typescript-configuration.md
|
|
27
|
+
*/
|
|
28
|
+
import { existsSync, readdirSync } from "fs";
|
|
29
|
+
import { resolve } from "path";
|
|
30
|
+
import { loadTsConfig } from "../config-sources/ts-config-loader.js";
|
|
31
|
+
/**
|
|
32
|
+
* Discover TS/JS task files in a directory.
|
|
33
|
+
*
|
|
34
|
+
* Looks for files matching `*.task.ts` or `*.task.js`.
|
|
35
|
+
*
|
|
36
|
+
* @param tasksDir - Absolute path to the tasks directory
|
|
37
|
+
* @returns Array of absolute file paths
|
|
38
|
+
*/
|
|
39
|
+
export function discoverTsTaskFiles(tasksDir) {
|
|
40
|
+
if (!existsSync(tasksDir))
|
|
41
|
+
return [];
|
|
42
|
+
return readdirSync(tasksDir)
|
|
43
|
+
.filter((f) => (f.endsWith(".task.ts") || f.endsWith(".task.js")) && !f.startsWith("."))
|
|
44
|
+
.sort()
|
|
45
|
+
.map((f) => resolve(tasksDir, f));
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Load task definitions from a single TS/JS task file.
|
|
49
|
+
*
|
|
50
|
+
* The file's default export can be:
|
|
51
|
+
* - A single task object → wrapped in an array
|
|
52
|
+
* - An array of task objects → used as-is
|
|
53
|
+
*
|
|
54
|
+
* Returns the raw task data without validation — the caller is
|
|
55
|
+
* responsible for running the result through Zod schemas.
|
|
56
|
+
*
|
|
57
|
+
* @param filePath - Absolute path to the .task.ts or .task.js file
|
|
58
|
+
* @returns The loaded task(s), or throws on load failure
|
|
59
|
+
*/
|
|
60
|
+
export async function loadTsTaskFile(filePath) {
|
|
61
|
+
const result = await loadTsConfig(filePath);
|
|
62
|
+
if (!result.ok) {
|
|
63
|
+
throw new Error(result.error);
|
|
64
|
+
}
|
|
65
|
+
const value = result.value;
|
|
66
|
+
// Normalize: single object → array of one
|
|
67
|
+
const tasks = Array.isArray(value) ? value : [value];
|
|
68
|
+
return { filePath, tasks };
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Load all TS task files from a directory.
|
|
72
|
+
*
|
|
73
|
+
* @param tasksDir - Absolute path to the tasks directory
|
|
74
|
+
* @returns Array of raw task data from all files
|
|
75
|
+
*/
|
|
76
|
+
export async function loadAllTsTaskFiles(tasksDir) {
|
|
77
|
+
const files = discoverTsTaskFiles(tasksDir);
|
|
78
|
+
const results = [];
|
|
79
|
+
for (const file of files) {
|
|
80
|
+
results.push(await loadTsTaskFile(file));
|
|
81
|
+
}
|
|
82
|
+
return results;
|
|
83
|
+
}
|
|
@@ -2,17 +2,17 @@
|
|
|
2
2
|
* Adapter: Load task definitions from tasks/*.yaml files.
|
|
3
3
|
*
|
|
4
4
|
* This adapter reads the raw YAML task definitions (before Promptfoo
|
|
5
|
-
* expansion) and maps them to
|
|
6
|
-
* @sanity/ailf-core. It handles
|
|
7
|
-
* task ID filtering.
|
|
5
|
+
* expansion) and maps them to GeneralizedTaskDefinition
|
|
6
|
+
* (LiteracyTaskDefinition variant) from @sanity/ailf-core. It handles
|
|
7
|
+
* area filtering (filename stem) and task ID filtering.
|
|
8
8
|
*
|
|
9
9
|
* Unlike loadAndExpandTasks() — which produces Promptfoo-specific
|
|
10
10
|
* ExpandedTestEntry objects — this adapter produces domain-level
|
|
11
|
-
*
|
|
11
|
+
* GeneralizedTaskDefinition objects suitable for the pipeline orchestrator.
|
|
12
12
|
*/
|
|
13
|
-
import type { FilterOptions,
|
|
13
|
+
import type { FilterOptions, GeneralizedTaskDefinition, TaskSource } from "../../_vendor/ailf-core/index.d.ts";
|
|
14
14
|
export declare class YamlTaskSource implements TaskSource {
|
|
15
15
|
private readonly rootDir;
|
|
16
16
|
constructor(rootDir: string);
|
|
17
|
-
loadTasks(filter?: FilterOptions): Promise<
|
|
17
|
+
loadTasks(filter?: FilterOptions): Promise<GeneralizedTaskDefinition[]>;
|
|
18
18
|
}
|
|
@@ -2,13 +2,13 @@
|
|
|
2
2
|
* Adapter: Load task definitions from tasks/*.yaml files.
|
|
3
3
|
*
|
|
4
4
|
* This adapter reads the raw YAML task definitions (before Promptfoo
|
|
5
|
-
* expansion) and maps them to
|
|
6
|
-
* @sanity/ailf-core. It handles
|
|
7
|
-
* task ID filtering.
|
|
5
|
+
* expansion) and maps them to GeneralizedTaskDefinition
|
|
6
|
+
* (LiteracyTaskDefinition variant) from @sanity/ailf-core. It handles
|
|
7
|
+
* area filtering (filename stem) and task ID filtering.
|
|
8
8
|
*
|
|
9
9
|
* Unlike loadAndExpandTasks() — which produces Promptfoo-specific
|
|
10
10
|
* ExpandedTestEntry objects — this adapter produces domain-level
|
|
11
|
-
*
|
|
11
|
+
* GeneralizedTaskDefinition objects suitable for the pipeline orchestrator.
|
|
12
12
|
*/
|
|
13
13
|
import { existsSync, readdirSync, readFileSync } from "fs";
|
|
14
14
|
import { resolve } from "path";
|
|
@@ -55,7 +55,7 @@ export class YamlTaskSource {
|
|
|
55
55
|
!filter.taskIds.includes(entry.id)) {
|
|
56
56
|
continue;
|
|
57
57
|
}
|
|
58
|
-
definitions.push(
|
|
58
|
+
definitions.push(mapToLiteracyTask(entry, featureArea));
|
|
59
59
|
}
|
|
60
60
|
}
|
|
61
61
|
return definitions;
|
|
@@ -65,29 +65,32 @@ export class YamlTaskSource {
|
|
|
65
65
|
// Mapping helpers
|
|
66
66
|
// ---------------------------------------------------------------------------
|
|
67
67
|
/**
|
|
68
|
-
* Map a raw YAML entry to a
|
|
68
|
+
* Map a raw YAML entry directly to a LiteracyTaskDefinition.
|
|
69
69
|
*
|
|
70
|
-
* Renames snake_case YAML keys to
|
|
71
|
-
* the task prompt from `vars.task`. Additional vars beyond `task`
|
|
72
|
-
* `docs` are collected into `
|
|
70
|
+
* Renames snake_case YAML keys to the generalized type's field names and
|
|
71
|
+
* extracts the task prompt from `vars.task`. Additional vars beyond `task`
|
|
72
|
+
* and `docs` are collected into `prompt.vars`.
|
|
73
73
|
*/
|
|
74
|
-
function
|
|
74
|
+
function mapToLiteracyTask(raw, featureArea) {
|
|
75
75
|
const { task, docs: _docs, ...rest } = (raw.vars ?? {});
|
|
76
|
-
const
|
|
76
|
+
const docs = (raw.canonical_docs ?? [])
|
|
77
77
|
.map(mapCanonicalDoc)
|
|
78
78
|
.filter((d) => d !== null);
|
|
79
79
|
const extraVars = Object.keys(rest).length > 0 ? rest : undefined;
|
|
80
80
|
return {
|
|
81
|
+
mode: "literacy",
|
|
81
82
|
id: raw.id,
|
|
82
|
-
|
|
83
|
-
featureArea,
|
|
84
|
-
|
|
85
|
-
|
|
83
|
+
title: raw.description,
|
|
84
|
+
area: featureArea,
|
|
85
|
+
prompt: {
|
|
86
|
+
text: typeof task === "string" ? task : "",
|
|
87
|
+
...(extraVars ? { vars: extraVars } : {}),
|
|
88
|
+
},
|
|
89
|
+
context: { docs },
|
|
86
90
|
referenceSolution: raw.reference_solution ?? "",
|
|
87
91
|
docCoverage: raw.doc_coverage ?? false,
|
|
88
92
|
assertions: (raw.assert ?? []),
|
|
89
93
|
...(raw.baseline ? { baseline: raw.baseline } : {}),
|
|
90
|
-
...(extraVars ? { extraVars } : {}),
|
|
91
94
|
};
|
|
92
95
|
}
|
|
93
96
|
// ---------------------------------------------------------------------------
|
package/dist/cli.js
CHANGED
|
@@ -157,8 +157,6 @@ import { createValidateTasksCommand } from "./commands/validate-tasks.js";
|
|
|
157
157
|
program.addCommand(createValidateTasksCommand().helpGroup(CommandGroup.SetupConfig));
|
|
158
158
|
import { createFetchDocsCommand } from "./commands/fetch-docs.js";
|
|
159
159
|
program.addCommand(createFetchDocsCommand().helpGroup(CommandGroup.SetupConfig));
|
|
160
|
-
import { createGenerateConfigsCommand } from "./commands/generate-configs.js";
|
|
161
|
-
program.addCommand(createGenerateConfigsCommand().helpGroup(CommandGroup.SetupConfig));
|
|
162
160
|
import { createCacheCommand } from "./commands/cache.js";
|
|
163
161
|
program.addCommand(createCacheCommand().helpGroup(CommandGroup.SetupConfig));
|
|
164
162
|
// ── Pipeline Internals ────────────────────────────────────────────────
|
|
@@ -11,8 +11,11 @@ import { Command } from "commander";
|
|
|
11
11
|
import { compareBaseline, listBaselines, saveBaseline, } from "../pipeline/baseline.js";
|
|
12
12
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
13
13
|
const ROOT = resolve(__dirname, "../..");
|
|
14
|
+
// CLI command name — kept as a constant to centralize the string literal.
|
|
15
|
+
// "baseline" here refers to score baseline snapshots, not the legacy eval mode.
|
|
16
|
+
const CMD_NAME = "baseline";
|
|
14
17
|
export function createBaselineCommand() {
|
|
15
|
-
const cmd = new Command(
|
|
18
|
+
const cmd = new Command(CMD_NAME).description("Manage historical baseline snapshots of evaluation scores");
|
|
16
19
|
// -----------------------------------------------------------------------
|
|
17
20
|
// baseline save
|
|
18
21
|
// -----------------------------------------------------------------------
|
|
@@ -3,10 +3,13 @@
|
|
|
3
3
|
* against task files to produce a documentation coverage audit.
|
|
4
4
|
*
|
|
5
5
|
*/
|
|
6
|
+
import { InMemoryPluginRegistry } from "../_vendor/ailf-core/index.js";
|
|
6
7
|
import { Command } from "commander";
|
|
7
8
|
import { dirname, resolve } from "path";
|
|
8
9
|
import { fileURLToPath } from "url";
|
|
9
10
|
import { countReferencedDocs, formatCoverageConsole, formatCoverageMarkdown, runCoverageAudit, } from "../pipeline/coverage-audit.js";
|
|
11
|
+
import { createLiteracyModeBase } from "../pipeline/compiler/mode-bases/index.js";
|
|
12
|
+
import { createSanityLiteracyPreset } from "../pipeline/compiler/presets/index.js";
|
|
10
13
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
11
14
|
const ROOT = resolve(__dirname, "..", "..");
|
|
12
15
|
export function createCoverageAuditCommand() {
|
|
@@ -15,7 +18,12 @@ export function createCoverageAuditCommand() {
|
|
|
15
18
|
.option("--format <fmt>", "Output format: table, md, markdown")
|
|
16
19
|
.option("--json", "Output raw JSON", false)
|
|
17
20
|
.action(async (opts) => {
|
|
18
|
-
|
|
21
|
+
// Build a registry with mode base + preset so coverage audit works
|
|
22
|
+
// even when config/features.ts is empty (preset is source of truth).
|
|
23
|
+
const registry = new InMemoryPluginRegistry();
|
|
24
|
+
registry.registerModeBase(createLiteracyModeBase());
|
|
25
|
+
registry.registerPreset(createSanityLiteracyPreset({ rootDir: ROOT }));
|
|
26
|
+
const report = runCoverageAudit(ROOT, { registry });
|
|
19
27
|
if (!report) {
|
|
20
28
|
console.error("❌ Coverage audit failed. Ensure config/features.yaml exists and is valid.");
|
|
21
29
|
process.exit(1);
|
|
@@ -23,6 +23,7 @@ import { TASK_FILE_NAMES } from "../_vendor/ailf-core/index.js";
|
|
|
23
23
|
import { buildPipelinePlan, buildSimpleCommandPlan, } from "../pipeline/plan.js";
|
|
24
24
|
import { formatPlanConsole, formatPlanJson } from "../pipeline/plan-format.js";
|
|
25
25
|
import { computeResolvedOptions } from "./pipeline-action.js";
|
|
26
|
+
import { LiteracyVariant } from "../pipeline/normalize-mode.js";
|
|
26
27
|
// ---------------------------------------------------------------------------
|
|
27
28
|
// Registry
|
|
28
29
|
// ---------------------------------------------------------------------------
|
|
@@ -84,8 +85,8 @@ const EXPLAIN_REGISTRY = {
|
|
|
84
85
|
filesCreated: ["results/latest/score-summary.json"],
|
|
85
86
|
filesRead: [
|
|
86
87
|
"results/latest/eval-results.json",
|
|
87
|
-
"config/rubrics.
|
|
88
|
-
"config/models.
|
|
88
|
+
"config/rubrics.ts",
|
|
89
|
+
"config/models.ts",
|
|
89
90
|
],
|
|
90
91
|
steps: [
|
|
91
92
|
{
|
|
@@ -138,12 +139,12 @@ const EXPLAIN_REGISTRY = {
|
|
|
138
139
|
},
|
|
139
140
|
"coverage-audit": {
|
|
140
141
|
description: "Cross-reference feature registry against evaluation tasks for coverage gaps",
|
|
141
|
-
filesRead: ["config/features.
|
|
142
|
+
filesRead: ["config/features.ts", "tasks/*.{yaml,task.ts,task.js}"],
|
|
142
143
|
steps: [
|
|
143
144
|
{
|
|
144
145
|
cacheStatus: "miss",
|
|
145
146
|
name: "Load feature registry",
|
|
146
|
-
reason: "Parse config/features.
|
|
147
|
+
reason: "Parse config/features.ts for product feature list",
|
|
147
148
|
willRun: true,
|
|
148
149
|
},
|
|
149
150
|
{
|
|
@@ -201,7 +202,7 @@ const EXPLAIN_REGISTRY = {
|
|
|
201
202
|
"fetch-docs": {
|
|
202
203
|
description: "Fetch documentation from Sanity CMS and generate canonical context files",
|
|
203
204
|
filesCreated: ["contexts/canonical/*.md"],
|
|
204
|
-
filesRead: ["config/sources.
|
|
205
|
+
filesRead: ["config/sources.ts", "config/models.ts"],
|
|
205
206
|
steps: [
|
|
206
207
|
{
|
|
207
208
|
cacheStatus: "miss",
|
|
@@ -224,7 +225,7 @@ const EXPLAIN_REGISTRY = {
|
|
|
224
225
|
],
|
|
225
226
|
},
|
|
226
227
|
"generate-configs": {
|
|
227
|
-
description: "Generate Promptfoo config files from models.
|
|
228
|
+
description: "Generate Promptfoo config files from models.ts and task definitions",
|
|
228
229
|
filesCreated: [
|
|
229
230
|
"promptfooconfig.yaml",
|
|
230
231
|
"promptfooconfig.observed.yaml",
|
|
@@ -232,16 +233,16 @@ const EXPLAIN_REGISTRY = {
|
|
|
232
233
|
"tasks/.expanded.yaml",
|
|
233
234
|
],
|
|
234
235
|
filesRead: [
|
|
235
|
-
"config/models.
|
|
236
|
-
"config/prompts.
|
|
237
|
-
"config/rubrics.
|
|
238
|
-
"config/sources.
|
|
236
|
+
"config/models.ts",
|
|
237
|
+
"config/prompts.ts",
|
|
238
|
+
"config/rubrics.ts",
|
|
239
|
+
"config/sources.ts",
|
|
239
240
|
],
|
|
240
241
|
steps: [
|
|
241
242
|
{
|
|
242
243
|
cacheStatus: "miss",
|
|
243
244
|
name: "Load models",
|
|
244
|
-
reason: "Parse config/models.
|
|
245
|
+
reason: "Parse config/models.ts for active model list",
|
|
245
246
|
willRun: true,
|
|
246
247
|
},
|
|
247
248
|
{
|
|
@@ -262,7 +263,7 @@ const EXPLAIN_REGISTRY = {
|
|
|
262
263
|
description: "Grader reliability tools (consistency, compare, sensitivity, validate)",
|
|
263
264
|
filesRead: [
|
|
264
265
|
"results/latest/eval-results.json",
|
|
265
|
-
"config/rubrics.
|
|
266
|
+
"config/rubrics.ts",
|
|
266
267
|
"canonical/reference-solutions/",
|
|
267
268
|
],
|
|
268
269
|
steps: [
|
|
@@ -369,7 +370,7 @@ const EXPLAIN_REGISTRY = {
|
|
|
369
370
|
filesRead: [
|
|
370
371
|
"results/latest/score-summary.json",
|
|
371
372
|
"results/latest/gap-analysis.json",
|
|
372
|
-
"config/thresholds.
|
|
373
|
+
"config/thresholds.ts",
|
|
373
374
|
"results/baselines/",
|
|
374
375
|
],
|
|
375
376
|
filesCreated: ["results/latest/readiness-report.md"],
|
|
@@ -377,7 +378,7 @@ const EXPLAIN_REGISTRY = {
|
|
|
377
378
|
{
|
|
378
379
|
cacheStatus: "miss",
|
|
379
380
|
name: "Load scores + thresholds",
|
|
380
|
-
reason: "Read score-summary.json and thresholds.
|
|
381
|
+
reason: "Read score-summary.json and thresholds.ts for gate evaluation",
|
|
381
382
|
willRun: true,
|
|
382
383
|
},
|
|
383
384
|
{
|
|
@@ -395,18 +396,18 @@ const EXPLAIN_REGISTRY = {
|
|
|
395
396
|
],
|
|
396
397
|
},
|
|
397
398
|
validate: {
|
|
398
|
-
description: "Validate all
|
|
399
|
+
description: "Validate all config files, task definitions, reference solutions, and environment",
|
|
399
400
|
filesRead: [
|
|
400
|
-
"config/models.
|
|
401
|
-
"config/rubrics.
|
|
402
|
-
"config/features.
|
|
403
|
-
"config/thresholds.
|
|
401
|
+
"config/models.ts",
|
|
402
|
+
"config/rubrics.ts",
|
|
403
|
+
"config/features.ts",
|
|
404
|
+
"config/thresholds.ts",
|
|
404
405
|
],
|
|
405
406
|
steps: [
|
|
406
407
|
{
|
|
407
408
|
cacheStatus: "miss",
|
|
408
409
|
name: "Validate configuration",
|
|
409
|
-
reason: "Parse all
|
|
410
|
+
reason: "Parse all config files through Zod schemas, cross-reference mappings",
|
|
410
411
|
willRun: true,
|
|
411
412
|
},
|
|
412
413
|
{
|
|
@@ -454,12 +455,12 @@ const EXPLAIN_REGISTRY = {
|
|
|
454
455
|
},
|
|
455
456
|
"weekly-digest": {
|
|
456
457
|
description: "Generate and deliver a weekly evaluation trend digest via Slack",
|
|
457
|
-
filesRead: ["config/schedules.
|
|
458
|
+
filesRead: ["config/schedules.ts", "config/sinks.ts"],
|
|
458
459
|
steps: [
|
|
459
460
|
{
|
|
460
461
|
cacheStatus: "miss",
|
|
461
462
|
name: "Load digest config",
|
|
462
|
-
reason: "Read schedules.
|
|
463
|
+
reason: "Read schedules.ts for lookback window and delivery targets",
|
|
463
464
|
willRun: true,
|
|
464
465
|
},
|
|
465
466
|
{
|
|
@@ -670,7 +671,7 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
|
|
|
670
671
|
graderReplications: raw.graderReplications,
|
|
671
672
|
header: raw.header ?? [],
|
|
672
673
|
headers: raw.headers ?? [],
|
|
673
|
-
mode: raw.mode ??
|
|
674
|
+
mode: raw.mode ?? LiteracyVariant.FULL,
|
|
674
675
|
output: raw.output,
|
|
675
676
|
promptfooUrl: raw.promptfooUrl,
|
|
676
677
|
publish: raw.publish,
|
|
@@ -714,6 +715,7 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
|
|
|
714
715
|
gapAnalysisEnabled: resolved.gapAnalysisEnabled,
|
|
715
716
|
graderReplications: resolved.graderReplications,
|
|
716
717
|
mode: resolved.mode,
|
|
718
|
+
variant: resolved.variant,
|
|
717
719
|
noCache: resolved.noCache,
|
|
718
720
|
publishEnabled: resolved.publishEnabled,
|
|
719
721
|
readinessEnabled: resolved.readinessEnabled,
|
|
@@ -41,7 +41,7 @@ async function executeFetchDocs(opts) {
|
|
|
41
41
|
// Build a minimal ResolvedConfig for the composition root
|
|
42
42
|
const ctx = createAppContext({
|
|
43
43
|
rootDir: ROOT,
|
|
44
|
-
mode: "
|
|
44
|
+
mode: "literacy",
|
|
45
45
|
noAutoScope: false,
|
|
46
46
|
skipFetch: false,
|
|
47
47
|
skipEval: true,
|
|
@@ -83,7 +83,8 @@ async function executeFetchDocs(opts) {
|
|
|
83
83
|
}
|
|
84
84
|
// Canonical contexts — same code path as the pipeline
|
|
85
85
|
const tasks = await ctx.taskSource.loadTasks();
|
|
86
|
-
|
|
86
|
+
// Bridge: narrow to literacy tasks with docs (only literacy tasks have context.docs)
|
|
87
|
+
const tasksWithDocs = tasks.filter((t) => t.mode === "literacy" && (t.context?.docs?.length ?? 0) > 0);
|
|
87
88
|
if (tasksWithDocs.length > 0) {
|
|
88
89
|
console.log("\nGenerating canonical (gold-retrieval) contexts...\n");
|
|
89
90
|
const result = await fetcher.fetch(tasksWithDocs, resolvedSource);
|
package/dist/commands/init.d.ts
CHANGED
|
@@ -5,12 +5,14 @@
|
|
|
5
5
|
* task files. The generated files are ready-to-edit starting points —
|
|
6
6
|
* not live evaluation tasks.
|
|
7
7
|
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
8
|
+
* TypeScript output (default) uses define* helpers from @sanity/ailf-core
|
|
9
|
+
* for full IDE autocomplete and type checking. YAML output preserves
|
|
10
|
+
* inline comments from the source files. JSON output is a plain
|
|
11
|
+
* serialization of the parsed data.
|
|
11
12
|
*
|
|
12
13
|
* Usage:
|
|
13
|
-
* ailf init #
|
|
14
|
+
* ailf init # TypeScript output (default)
|
|
15
|
+
* ailf init --output-format yaml # YAML output
|
|
14
16
|
* ailf init --output-format json # JSON output
|
|
15
17
|
* ailf init --force # overwrite existing files
|
|
16
18
|
* ailf init --path ./my-dir # target a specific directory
|