@sanity/ailf 0.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/config/features.ts +23 -0
- package/config/models.ts +95 -0
- package/config/prompts.ts +16 -0
- package/config/rubrics.ts +225 -0
- package/config/schedules.ts +47 -0
- package/config/sinks.ts +37 -0
- package/config/sources.ts +21 -0
- package/config/thresholds.ts +61 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +171 -0
- package/dist/_vendor/ailf-core/config-helpers.js +170 -0
- package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
- package/dist/_vendor/ailf-core/env-helper.js +45 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/examples/index.js +25 -0
- package/dist/_vendor/ailf-core/index.d.ts +3 -0
- package/dist/_vendor/ailf-core/index.js +5 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +17 -2
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
- package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
- package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +8 -2
- package/dist/_vendor/ailf-core/schemas/eval-config.js +17 -2
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +9 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +8 -1
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -31
- package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -9
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
- package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
- package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/services/index.js +2 -1
- package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
- package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
- package/dist/_vendor/ailf-core/services/scoring.js +25 -15
- package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
- package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +332 -0
- package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +45 -83
- package/dist/_vendor/ailf-core/types/index.js +8 -1
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +257 -0
- package/dist/_vendor/ailf-core/types/plugin-registry.js +185 -0
- package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
- package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
- package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
- package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
- package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
- package/dist/_vendor/ailf-core/types/trace.js +18 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
- package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
- package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
- package/dist/_vendor/ailf-shared/index.d.ts +0 -1
- package/dist/_vendor/ailf-shared/index.js +0 -1
- package/dist/adapters/api-client/build-request.js +14 -13
- package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
- package/dist/adapters/config-sources/file-config-adapter.js +39 -12
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +1 -0
- package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
- package/dist/adapters/config-sources/ts-config-loader.js +141 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
- package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +35 -39
- package/dist/adapters/task-sources/index.d.ts +3 -2
- package/dist/adapters/task-sources/index.js +3 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
- package/dist/adapters/task-sources/repo-schemas.js +227 -19
- package/dist/adapters/task-sources/repo-task-source.d.ts +16 -12
- package/dist/adapters/task-sources/repo-task-source.js +92 -80
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
- package/dist/adapters/task-sources/task-file-loader.js +83 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
- package/dist/adapters/task-sources/yaml-task-source.js +19 -16
- package/dist/cli.js +0 -2
- package/dist/commands/baseline.js +4 -1
- package/dist/commands/calculate-scores.js +1 -1
- package/dist/commands/coverage-audit.js +9 -1
- package/dist/commands/explain-handler.js +25 -23
- package/dist/commands/fetch-docs.js +3 -2
- package/dist/commands/generate-configs.js +1 -1
- package/dist/commands/init.d.ts +6 -4
- package/dist/commands/init.js +302 -23
- package/dist/commands/interactive.js +11 -7
- package/dist/commands/pipeline-action.d.ts +2 -0
- package/dist/commands/pipeline-action.js +16 -6
- package/dist/commands/pipeline.d.ts +1 -0
- package/dist/commands/pipeline.js +4 -2
- package/dist/commands/pr-comment.js +1 -1
- package/dist/commands/publish.js +2 -2
- package/dist/commands/readiness-report.js +13 -6
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +13 -1
- package/dist/composition-root.js +99 -4
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/orchestration/build-app-context.js +1 -0
- package/dist/orchestration/build-step-sequence.js +28 -8
- package/dist/orchestration/steps/calculate-scores-step.js +24 -11
- package/dist/orchestration/steps/fetch-docs-step.js +8 -7
- package/dist/orchestration/steps/gap-analysis-step.js +8 -7
- package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
- package/dist/orchestration/steps/generate-configs-step.js +261 -51
- package/dist/orchestration/steps/grader-consistency-step.js +7 -4
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/readiness-step.js +5 -6
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
- package/dist/orchestration/steps/run-eval-step.js +8 -7
- package/dist/pipeline/cache.d.ts +1 -1
- package/dist/pipeline/cache.js +36 -8
- package/dist/pipeline/calculate-scores.d.ts +2 -4
- package/dist/pipeline/calculate-scores.js +43 -113
- package/dist/pipeline/checks.js +2 -2
- package/dist/pipeline/compare.js +8 -8
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +392 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +404 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
- package/dist/pipeline/compiler/assertion-mapper.js +175 -0
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
- package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
- package/dist/pipeline/compiler/config-loader.d.ts +56 -0
- package/dist/pipeline/compiler/config-loader.js +111 -0
- package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
- package/dist/pipeline/compiler/fixture-resolver.js +113 -0
- package/dist/pipeline/compiler/hash.d.ts +11 -0
- package/dist/pipeline/compiler/hash.js +18 -0
- package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
- package/dist/pipeline/compiler/ignore-fields.js +113 -0
- package/dist/pipeline/compiler/index.d.ts +29 -0
- package/dist/pipeline/compiler/index.js +45 -0
- package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
- package/dist/pipeline/compiler/literacy-bridge.js +172 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +15 -0
- package/dist/pipeline/compiler/mode-handlers/index.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/index.d.ts +9 -0
- package/dist/pipeline/compiler/presets/index.js +8 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +42 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.js +208 -0
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
- package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
- package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
- package/dist/pipeline/compiler/provider-assembler.js +137 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
- package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
- package/dist/pipeline/compiler/sandbox/index.js +11 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
- package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
- package/dist/pipeline/compiler/scoring-bridge.js +114 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
- package/dist/pipeline/compiler/task-graph-builder.js +291 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
- package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
- package/dist/pipeline/compiler/telemetry/index.js +19 -0
- package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
- package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
- package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
- package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
- package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
- package/dist/pipeline/compiler/variable-resolver.js +115 -0
- package/dist/pipeline/coverage-audit.d.ts +15 -5
- package/dist/pipeline/coverage-audit.js +41 -22
- package/dist/pipeline/eval-constants.d.ts +16 -6
- package/dist/pipeline/eval-constants.js +25 -4
- package/dist/pipeline/eval-fingerprint.d.ts +2 -2
- package/dist/pipeline/eval-fingerprint.js +8 -9
- package/dist/pipeline/expand-tasks.d.ts +19 -10
- package/dist/pipeline/expand-tasks.js +34 -28
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +2 -2
- package/dist/pipeline/generate-configs.d.ts +22 -4
- package/dist/pipeline/generate-configs.js +53 -24
- package/dist/pipeline/grader-api.d.ts +3 -3
- package/dist/pipeline/grader-api.js +5 -12
- package/dist/pipeline/grader-compare-runner.js +20 -27
- package/dist/pipeline/grader-comparison.d.ts +4 -8
- package/dist/pipeline/grader-comparison.js +11 -17
- package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
- package/dist/pipeline/grader-consistency-runner.js +16 -20
- package/dist/pipeline/grader-consistency.d.ts +6 -10
- package/dist/pipeline/grader-consistency.js +13 -32
- package/dist/pipeline/grader-sensitivity-runner.js +7 -5
- package/dist/pipeline/grader-sensitivity.d.ts +2 -6
- package/dist/pipeline/grader-sensitivity.js +10 -10
- package/dist/pipeline/grader-validate-runner.js +7 -5
- package/dist/pipeline/grader-validation.d.ts +2 -6
- package/dist/pipeline/grader-validation.js +14 -22
- package/dist/pipeline/map-request-to-config.js +7 -1
- package/dist/pipeline/mirror-repo-tasks.d.ts +13 -13
- package/dist/pipeline/mirror-repo-tasks.js +22 -21
- package/dist/pipeline/normalize-mode.d.ts +49 -0
- package/dist/pipeline/normalize-mode.js +64 -0
- package/dist/pipeline/plan.d.ts +5 -2
- package/dist/pipeline/plan.js +134 -78
- package/dist/pipeline/pr-comment.js +2 -0
- package/dist/pipeline/profile-resolution.d.ts +22 -14
- package/dist/pipeline/profile-resolution.js +41 -19
- package/dist/pipeline/provenance.d.ts +2 -2
- package/dist/pipeline/provenance.js +12 -17
- package/dist/pipeline/release-report.js +4 -4
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/rubric-loader.d.ts +20 -0
- package/dist/pipeline/rubric-loader.js +37 -0
- package/dist/pipeline/validate.d.ts +4 -4
- package/dist/pipeline/validate.js +64 -53
- package/dist/schedules/loader.js +18 -8
- package/dist/scripts/migrate-task-mode.d.ts +24 -0
- package/dist/scripts/migrate-task-mode.js +85 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +15 -15
- package/dist/sinks/loader.js +5 -7
- package/dist/sources.d.ts +7 -7
- package/dist/sources.js +22 -24
- package/dist/webhook/dispatch.js +2 -1
- package/package.json +15 -4
- package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
- package/tasks/literacy/frameworks.task.ts +128 -0
- package/tasks/literacy/functions.task.ts +69 -0
- package/tasks/literacy/groq.task.ts +258 -0
- package/tasks/literacy/nextjs-live.task.ts +75 -0
- package/tasks/literacy/studio-setup.task.ts +131 -0
- package/tasks/literacy/visual-editing.task.ts +146 -0
- package/config/features.yaml +0 -116
- package/config/models.yaml +0 -116
- package/config/prompts.yaml +0 -75
- package/config/rubrics.yaml +0 -81
- package/config/schedules.yaml +0 -43
- package/config/sinks.yaml +0 -54
- package/config/sources.yaml +0 -51
- package/config/thresholds.yaml +0 -49
- package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
- package/dist/_vendor/ailf-tasks/cli.js +0 -61
- package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
- package/dist/_vendor/ailf-tasks/index.js +0 -16
- package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
- package/dist/_vendor/ailf-tasks/parser.js +0 -73
- package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
- package/dist/_vendor/ailf-tasks/schemas.js +0 -180
- package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
- package/dist/_vendor/ailf-tasks/validation.js +0 -162
- package/dist/agent-observer/test-imports.d.ts +0 -7
- package/dist/agent-observer/test-imports.js +0 -185
|
@@ -99,11 +99,13 @@ export function formatValidationReport(result) {
|
|
|
99
99
|
const sep = "|------------------|-------|-------------|-----------|--------|-------|";
|
|
100
100
|
lines.push(h);
|
|
101
101
|
lines.push(sep);
|
|
102
|
-
const dims = [
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
102
|
+
const dims = Object.entries(result.perDimension).map(([key, data]) => ({
|
|
103
|
+
data,
|
|
104
|
+
name: key
|
|
105
|
+
.split(/[-_]/)
|
|
106
|
+
.map((w) => w.charAt(0).toUpperCase() + w.slice(1))
|
|
107
|
+
.join(" "),
|
|
108
|
+
}));
|
|
107
109
|
for (const { data, name } of dims) {
|
|
108
110
|
const quality = classifyCorrelation(data.correlation);
|
|
109
111
|
const biasStr = data.bias > 0 ? `+${data.bias}` : `${data.bias}`;
|
|
@@ -63,12 +63,8 @@ export interface GraderValidation {
|
|
|
63
63
|
overallMae: number;
|
|
64
64
|
/** Whether the grader passes the MAE threshold (default: MAE < 10) */
|
|
65
65
|
passesThreshold: boolean;
|
|
66
|
-
/** Per-dimension validity metrics */
|
|
67
|
-
perDimension:
|
|
68
|
-
taskCompletion: DimensionValidity;
|
|
69
|
-
codeCorrectness: DimensionValidity;
|
|
70
|
-
docCoverage: DimensionValidity;
|
|
71
|
-
};
|
|
66
|
+
/** Per-dimension validity metrics (keyed by dimension name) */
|
|
67
|
+
perDimension: Record<string, DimensionValidity>;
|
|
72
68
|
/** Total number of (grader, human) score pairs analyzed */
|
|
73
69
|
totalObservations: number;
|
|
74
70
|
}
|
|
@@ -77,11 +77,7 @@ export function validateGrader(grades, graderModel, options) {
|
|
|
77
77
|
overallCorrelation: 0,
|
|
78
78
|
overallMae: 0,
|
|
79
79
|
passesThreshold: true,
|
|
80
|
-
perDimension: {
|
|
81
|
-
codeCorrectness: { bias: 0, correlation: 0, count: 0, mae: 0 },
|
|
82
|
-
docCoverage: { bias: 0, correlation: 0, count: 0, mae: 0 },
|
|
83
|
-
taskCompletion: { bias: 0, correlation: 0, count: 0, mae: 0 },
|
|
84
|
-
},
|
|
80
|
+
perDimension: {},
|
|
85
81
|
totalObservations: 0,
|
|
86
82
|
};
|
|
87
83
|
}
|
|
@@ -90,28 +86,24 @@ export function validateGrader(grades, graderModel, options) {
|
|
|
90
86
|
grader: g.graderScore,
|
|
91
87
|
human: g.humanScore,
|
|
92
88
|
}));
|
|
93
|
-
// Group by dimension
|
|
94
|
-
const byDimension = {
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
.filter((g) => g.dimension === "taskCompletion")
|
|
103
|
-
.map((g) => ({ grader: g.graderScore, human: g.humanScore })),
|
|
104
|
-
};
|
|
89
|
+
// Group by dimension dynamically
|
|
90
|
+
const byDimension = {};
|
|
91
|
+
for (const g of grades) {
|
|
92
|
+
;
|
|
93
|
+
(byDimension[g.dimension] ??= []).push({
|
|
94
|
+
grader: g.graderScore,
|
|
95
|
+
human: g.humanScore,
|
|
96
|
+
});
|
|
97
|
+
}
|
|
105
98
|
// Overall metrics
|
|
106
99
|
const overallMae = computeMae(allPairs);
|
|
107
100
|
const overallCorrelation = Math.round(pearsonCorrelation(allPairs.map((p) => p.grader), allPairs.map((p) => p.human)) * 100) / 100;
|
|
108
101
|
const overallBias = computeBias(allPairs);
|
|
109
102
|
// Per-dimension metrics
|
|
110
|
-
const perDimension = {
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
};
|
|
103
|
+
const perDimension = {};
|
|
104
|
+
for (const [dim, dimPairs] of Object.entries(byDimension)) {
|
|
105
|
+
perDimension[dim] = computeDimensionValidity(dimPairs);
|
|
106
|
+
}
|
|
115
107
|
// Find largest disagreements
|
|
116
108
|
const disagreements = grades
|
|
117
109
|
.map((g) => ({
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { normalizeMode } from "./normalize-mode.js";
|
|
1
2
|
/**
|
|
2
3
|
* Map a PipelineRequest to a ResolvedConfig.
|
|
3
4
|
*
|
|
@@ -16,13 +17,17 @@
|
|
|
16
17
|
* with `publish: false`.
|
|
17
18
|
*/
|
|
18
19
|
export function mapRequestToConfig(request, rootDir) {
|
|
20
|
+
// Normalize mode so downstream pipeline code only sees canonical names.
|
|
21
|
+
// The API may receive legacy names ("baseline", "full") from older clients.
|
|
22
|
+
const { mode, variant } = normalizeMode(request.mode ?? "full");
|
|
19
23
|
// API-triggered evaluations (identified by jobId) default to publish: true.
|
|
20
24
|
// Without this, the job's reportId is always null and GET /v1/reports/:id
|
|
21
25
|
// has nothing to return.
|
|
22
26
|
const publishDefault = !!request.jobId;
|
|
23
27
|
return {
|
|
24
28
|
rootDir,
|
|
25
|
-
mode
|
|
29
|
+
mode,
|
|
30
|
+
variant,
|
|
26
31
|
debug: mapDebug(request.debug),
|
|
27
32
|
areas: request.areas,
|
|
28
33
|
tasks: request.tasks,
|
|
@@ -63,6 +68,7 @@ export function mapRequestToConfig(request, rootDir) {
|
|
|
63
68
|
jobId: request.jobId,
|
|
64
69
|
remote: false,
|
|
65
70
|
apiUrl: "https://ailf-api.sanity.build",
|
|
71
|
+
presets: request.presets,
|
|
66
72
|
};
|
|
67
73
|
}
|
|
68
74
|
function mapDebug(debug) {
|
|
@@ -13,12 +13,12 @@
|
|
|
13
13
|
* @see docs/exec-plans/tasks-as-content/phase-5-content-lake-mirroring.md
|
|
14
14
|
*/
|
|
15
15
|
import type { SanityClient } from "@sanity/client";
|
|
16
|
-
import { type
|
|
16
|
+
import { type LiteracyTaskDefinition, type Logger } from "../_vendor/ailf-core/index.d.ts";
|
|
17
17
|
export interface MirrorOptions {
|
|
18
18
|
/** Sanity client with write access */
|
|
19
19
|
client: SanityClient;
|
|
20
20
|
/** Tasks to mirror (already loaded from repo) */
|
|
21
|
-
tasks:
|
|
21
|
+
tasks: LiteracyTaskDefinition[];
|
|
22
22
|
/** Git context for origin provenance */
|
|
23
23
|
git: GitContext;
|
|
24
24
|
/** If true, log what would be done without writing */
|
|
@@ -58,7 +58,7 @@ export interface MirrorResult {
|
|
|
58
58
|
skipped: number;
|
|
59
59
|
/** Feature areas auto-created */
|
|
60
60
|
areasCreated: string[];
|
|
61
|
-
/**
|
|
61
|
+
/** Context doc slugs that failed to resolve */
|
|
62
62
|
unresolvedSlugs: string[];
|
|
63
63
|
/** Errors (non-fatal — mirror continues) */
|
|
64
64
|
errors: string[];
|
|
@@ -70,7 +70,7 @@ export interface MirrorResult {
|
|
|
70
70
|
* 1. Compute deterministic document ID
|
|
71
71
|
* 2. Compute content hash of the task definition
|
|
72
72
|
* 3. Check if mirror document exists with same hash → skip if unchanged
|
|
73
|
-
* 4. Resolve
|
|
73
|
+
* 4. Resolve context doc slugs → Sanity references
|
|
74
74
|
* 5. Auto-create feature areas if needed
|
|
75
75
|
* 6. Upsert the ailf.task document with origin block
|
|
76
76
|
*/
|
|
@@ -90,15 +90,15 @@ export declare function detectGitContext(repoTasksPath: string): Promise<GitCont
|
|
|
90
90
|
*/
|
|
91
91
|
export declare function mirrorDocId(owner: string, repo: string, taskId: string): string;
|
|
92
92
|
/**
|
|
93
|
-
* Compute a content hash of a
|
|
93
|
+
* Compute a content hash of a LiteracyTaskDefinition for change detection.
|
|
94
94
|
*
|
|
95
95
|
* Includes all fields that affect the mirror document. Excludes
|
|
96
96
|
* runtime metadata like referenceSolution (filesystem path) since
|
|
97
97
|
* that's not mirrored.
|
|
98
98
|
*/
|
|
99
|
-
export declare function computeTaskHash(task:
|
|
99
|
+
export declare function computeTaskHash(task: LiteracyTaskDefinition): string;
|
|
100
100
|
/** @internal Exported for testing — not part of the public API. */
|
|
101
|
-
export declare function buildMirrorDocument(task:
|
|
101
|
+
export declare function buildMirrorDocument(task: LiteracyTaskDefinition, opts: {
|
|
102
102
|
contentHash: string;
|
|
103
103
|
docId: string;
|
|
104
104
|
/** Existing author from the current mirror document (write-once preservation) */
|
|
@@ -113,9 +113,9 @@ export declare function buildMirrorDocument(task: TaskDefinition, opts: {
|
|
|
113
113
|
_id: string;
|
|
114
114
|
_type: string;
|
|
115
115
|
ownership: string;
|
|
116
|
-
status: "
|
|
117
|
-
|
|
118
|
-
|
|
116
|
+
status: import("@sanity/ailf-core").TaskStatus;
|
|
117
|
+
assertions: Record<string, unknown>[];
|
|
118
|
+
contextDocs: ({
|
|
119
119
|
_key: string;
|
|
120
120
|
reason: string;
|
|
121
121
|
} | {
|
|
@@ -138,9 +138,9 @@ export declare function buildMirrorDocument(task: TaskDefinition, opts: {
|
|
|
138
138
|
_key: string;
|
|
139
139
|
reason: string;
|
|
140
140
|
})[];
|
|
141
|
-
|
|
141
|
+
title: string;
|
|
142
142
|
docCoverage: boolean;
|
|
143
|
-
|
|
143
|
+
area: {
|
|
144
144
|
_ref: string;
|
|
145
145
|
_type: string;
|
|
146
146
|
};
|
|
@@ -161,5 +161,5 @@ export declare function buildMirrorDocument(task: TaskDefinition, opts: {
|
|
|
161
161
|
author: GitAuthor;
|
|
162
162
|
lastEditor: GitAuthor;
|
|
163
163
|
};
|
|
164
|
-
|
|
164
|
+
promptText: string;
|
|
165
165
|
};
|
|
@@ -26,7 +26,7 @@ import { ConsoleLogger } from "../adapters/loggers/index.js";
|
|
|
26
26
|
* 1. Compute deterministic document ID
|
|
27
27
|
* 2. Compute content hash of the task definition
|
|
28
28
|
* 3. Check if mirror document exists with same hash → skip if unchanged
|
|
29
|
-
* 4. Resolve
|
|
29
|
+
* 4. Resolve context doc slugs → Sanity references
|
|
30
30
|
* 5. Auto-create feature areas if needed
|
|
31
31
|
* 6. Upsert the ailf.task document with origin block
|
|
32
32
|
*/
|
|
@@ -43,10 +43,10 @@ export async function mirrorRepoTasks(options) {
|
|
|
43
43
|
};
|
|
44
44
|
if (tasks.length === 0)
|
|
45
45
|
return result;
|
|
46
|
-
// Batch-resolve all
|
|
46
|
+
// Batch-resolve all context doc slugs (slug refs only — other ref types
|
|
47
47
|
// are stored without a resolved article reference for now)
|
|
48
48
|
const allSlugs = [
|
|
49
|
-
...new Set(tasks.flatMap((t) => t.
|
|
49
|
+
...new Set(tasks.flatMap((t) => (t.context?.docs ?? []).filter(isSlugRef).map((d) => d.slug))),
|
|
50
50
|
];
|
|
51
51
|
const slugToDocId = await batchResolveDocSlugs(client, allSlugs);
|
|
52
52
|
// Track unresolved slugs
|
|
@@ -56,7 +56,7 @@ export async function mirrorRepoTasks(options) {
|
|
|
56
56
|
}
|
|
57
57
|
}
|
|
58
58
|
// Ensure all feature areas exist
|
|
59
|
-
const areas = [...new Set(tasks.map((t) => t.
|
|
59
|
+
const areas = [...new Set(tasks.map((t) => t.area ?? ""))];
|
|
60
60
|
const createdAreas = await ensureFeatureAreas(client, areas, dryRun, log);
|
|
61
61
|
result.areasCreated = createdAreas;
|
|
62
62
|
// Fetch existing mirror document state for change detection + ownership check
|
|
@@ -241,7 +241,7 @@ export function mirrorDocId(owner, repo, taskId) {
|
|
|
241
241
|
// Content hashing
|
|
242
242
|
// ---------------------------------------------------------------------------
|
|
243
243
|
/**
|
|
244
|
-
* Compute a content hash of a
|
|
244
|
+
* Compute a content hash of a LiteracyTaskDefinition for change detection.
|
|
245
245
|
*
|
|
246
246
|
* Includes all fields that affect the mirror document. Excludes
|
|
247
247
|
* runtime metadata like referenceSolution (filesystem path) since
|
|
@@ -250,10 +250,10 @@ export function mirrorDocId(owner, repo, taskId) {
|
|
|
250
250
|
export function computeTaskHash(task) {
|
|
251
251
|
const payload = JSON.stringify({
|
|
252
252
|
id: task.id,
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
253
|
+
title: task.title,
|
|
254
|
+
area: task.area,
|
|
255
|
+
prompt: task.prompt,
|
|
256
|
+
docs: task.context?.docs,
|
|
257
257
|
docCoverage: task.docCoverage,
|
|
258
258
|
assertions: task.assertions,
|
|
259
259
|
baseline: task.baseline,
|
|
@@ -353,10 +353,10 @@ async function fetchExistingDocState(client, docIds) {
|
|
|
353
353
|
/** @internal Exported for testing — not part of the public API. */
|
|
354
354
|
export function buildMirrorDocument(task, opts) {
|
|
355
355
|
const { contentHash, docId, existingAuthor, git, slugToDocId } = opts;
|
|
356
|
-
// Build
|
|
356
|
+
// Build context docs with resolved references and correct refType.
|
|
357
357
|
// Each ref type gets the appropriate resolution fields set on the
|
|
358
358
|
// mirror document so Studio can display them correctly.
|
|
359
|
-
const
|
|
359
|
+
const contextDocs = (task.context?.docs ?? []).map((ref, i) => {
|
|
360
360
|
const base = { _key: `cd${i}`, reason: ref.reason ?? "" };
|
|
361
361
|
if (isSlugRef(ref)) {
|
|
362
362
|
const resolvedId = slugToDocId.get(ref.slug);
|
|
@@ -395,7 +395,7 @@ export function buildMirrorDocument(task, opts) {
|
|
|
395
395
|
return base;
|
|
396
396
|
});
|
|
397
397
|
// Build assertions
|
|
398
|
-
const assertArray = task.assertions.map((a, i) => {
|
|
398
|
+
const assertArray = (task.assertions ?? []).map((a, i) => {
|
|
399
399
|
const entry = {
|
|
400
400
|
_key: `a${i}`,
|
|
401
401
|
type: a.type,
|
|
@@ -420,19 +420,20 @@ export function buildMirrorDocument(task, opts) {
|
|
|
420
420
|
}
|
|
421
421
|
return entry;
|
|
422
422
|
});
|
|
423
|
-
// Determine the source file path (best-effort from task's
|
|
424
|
-
const
|
|
423
|
+
// Determine the source file path (best-effort from task's area)
|
|
424
|
+
const area = task.area ?? "";
|
|
425
|
+
const filePath = `.ailf/tasks/${area}.yaml`;
|
|
425
426
|
return {
|
|
426
427
|
_id: docId,
|
|
427
428
|
_type: "ailf.task",
|
|
428
429
|
ownership: "repo",
|
|
429
430
|
status: task.status ?? "active",
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
docCoverage: task.docCoverage,
|
|
434
|
-
|
|
435
|
-
_ref: `ailf.featureArea.${
|
|
431
|
+
assertions: assertArray,
|
|
432
|
+
contextDocs,
|
|
433
|
+
title: task.title,
|
|
434
|
+
docCoverage: task.docCoverage ?? false,
|
|
435
|
+
area: {
|
|
436
|
+
_ref: `ailf.featureArea.${area}`,
|
|
436
437
|
_type: "reference",
|
|
437
438
|
},
|
|
438
439
|
id: { _type: "slug", current: task.id },
|
|
@@ -451,7 +452,7 @@ export function buildMirrorDocument(task, opts) {
|
|
|
451
452
|
author: existingAuthor ?? git.author,
|
|
452
453
|
lastEditor: git.author,
|
|
453
454
|
},
|
|
454
|
-
|
|
455
|
+
promptText: task.prompt?.text ?? "",
|
|
455
456
|
...(task.baseline
|
|
456
457
|
? {
|
|
457
458
|
baseline: {
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI boundary normalization for evaluation mode names.
|
|
3
|
+
*
|
|
4
|
+
* Legacy CLI users pass variant names like "baseline" or "agentic" as the
|
|
5
|
+
* --mode flag. This module normalizes those to the canonical mode ("literacy")
|
|
6
|
+
* plus a variant field, so downstream pipeline code only ever sees canonical
|
|
7
|
+
* mode names.
|
|
8
|
+
*/
|
|
9
|
+
import { type EvalMode } from "../_vendor/ailf-shared/index.d.ts";
|
|
10
|
+
/**
|
|
11
|
+
* Literacy variant name constants.
|
|
12
|
+
*
|
|
13
|
+
* Production code imports these instead of scattering legacy string literals.
|
|
14
|
+
* Defined here (alongside the normalizer) so all variant name definitions
|
|
15
|
+
* live in one file — the single source of truth for the legacy-to-canonical
|
|
16
|
+
* mapping.
|
|
17
|
+
*/
|
|
18
|
+
export declare const LiteracyVariant: {
|
|
19
|
+
/** Standard with-docs / without-docs evaluation (legacy mode name: "baseline") */
|
|
20
|
+
readonly STANDARD: "baseline";
|
|
21
|
+
/** Agentic evaluation — model uses tools to find docs */
|
|
22
|
+
readonly AGENTIC: "agentic";
|
|
23
|
+
/** Observed mode — HTTP-instrumented behavior observation */
|
|
24
|
+
readonly OBSERVED: "observed";
|
|
25
|
+
/** Full mode — standard + agentic combined */
|
|
26
|
+
readonly FULL: "full";
|
|
27
|
+
};
|
|
28
|
+
/** Union of all literacy variant string values */
|
|
29
|
+
export type LiteracyVariantName = (typeof LiteracyVariant)[keyof typeof LiteracyVariant];
|
|
30
|
+
/**
|
|
31
|
+
* The two literacy evaluation sub-modes that control entry generation.
|
|
32
|
+
* "standard" (baseline) generates gold + floor entries; "agentic" generates
|
|
33
|
+
* gold entries only.
|
|
34
|
+
*/
|
|
35
|
+
export type LiteracyEvalSubMode = typeof LiteracyVariant.STANDARD | typeof LiteracyVariant.AGENTIC;
|
|
36
|
+
export interface NormalizedMode {
|
|
37
|
+
mode: EvalMode;
|
|
38
|
+
variant?: string;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Normalize a raw CLI mode string to a canonical mode + optional variant.
|
|
42
|
+
*
|
|
43
|
+
* Legacy names ("baseline", "agentic", "observed", "full") are mapped to
|
|
44
|
+
* `{ mode: "literacy", variant: "<name>" }` and emit a deprecation warning
|
|
45
|
+
* on stderr. Canonical names pass through unchanged.
|
|
46
|
+
*
|
|
47
|
+
* @throws {Error} If the input is not a recognized mode or variant name.
|
|
48
|
+
*/
|
|
49
|
+
export declare function normalizeMode(input: string): NormalizedMode;
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI boundary normalization for evaluation mode names.
|
|
3
|
+
*
|
|
4
|
+
* Legacy CLI users pass variant names like "baseline" or "agentic" as the
|
|
5
|
+
* --mode flag. This module normalizes those to the canonical mode ("literacy")
|
|
6
|
+
* plus a variant field, so downstream pipeline code only ever sees canonical
|
|
7
|
+
* mode names.
|
|
8
|
+
*/
|
|
9
|
+
import { CANONICAL_EVAL_MODES, LEGACY_EVAL_MODE_ALIASES, } from "../_vendor/ailf-shared/index.js";
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
// Constants (derived from shared package — single source of truth)
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
/** The 5 canonical evaluation modes. */
|
|
14
|
+
const CANONICAL_MODES = new Set(CANONICAL_EVAL_MODES);
|
|
15
|
+
/**
|
|
16
|
+
* Literacy variant name constants.
|
|
17
|
+
*
|
|
18
|
+
* Production code imports these instead of scattering legacy string literals.
|
|
19
|
+
* Defined here (alongside the normalizer) so all variant name definitions
|
|
20
|
+
* live in one file — the single source of truth for the legacy-to-canonical
|
|
21
|
+
* mapping.
|
|
22
|
+
*/
|
|
23
|
+
export const LiteracyVariant = {
|
|
24
|
+
/** Standard with-docs / without-docs evaluation (legacy mode name: "baseline") */
|
|
25
|
+
STANDARD: "baseline",
|
|
26
|
+
/** Agentic evaluation — model uses tools to find docs */
|
|
27
|
+
AGENTIC: "agentic",
|
|
28
|
+
/** Observed mode — HTTP-instrumented behavior observation */
|
|
29
|
+
OBSERVED: "observed",
|
|
30
|
+
/** Full mode — standard + agentic combined */
|
|
31
|
+
FULL: "full",
|
|
32
|
+
};
|
|
33
|
+
/**
|
|
34
|
+
* Legacy CLI names that are really literacy variants, not distinct modes.
|
|
35
|
+
* Each maps to `mode: "literacy"` with the original name as the variant.
|
|
36
|
+
*/
|
|
37
|
+
const LEGACY_LITERACY_VARIANTS = new Set(LEGACY_EVAL_MODE_ALIASES);
|
|
38
|
+
/** Union of all accepted input strings for error messages. */
|
|
39
|
+
const ALL_ACCEPTED = [
|
|
40
|
+
...Array.from(CANONICAL_MODES),
|
|
41
|
+
...Array.from(LEGACY_LITERACY_VARIANTS),
|
|
42
|
+
];
|
|
43
|
+
// ---------------------------------------------------------------------------
|
|
44
|
+
// Public API
|
|
45
|
+
// ---------------------------------------------------------------------------
|
|
46
|
+
/**
|
|
47
|
+
* Normalize a raw CLI mode string to a canonical mode + optional variant.
|
|
48
|
+
*
|
|
49
|
+
* Legacy names ("baseline", "agentic", "observed", "full") are mapped to
|
|
50
|
+
* `{ mode: "literacy", variant: "<name>" }` and emit a deprecation warning
|
|
51
|
+
* on stderr. Canonical names pass through unchanged.
|
|
52
|
+
*
|
|
53
|
+
* @throws {Error} If the input is not a recognized mode or variant name.
|
|
54
|
+
*/
|
|
55
|
+
export function normalizeMode(input) {
|
|
56
|
+
if (LEGACY_LITERACY_VARIANTS.has(input)) {
|
|
57
|
+
console.warn(`⚠ Deprecated: --mode ${input} is a legacy alias. Use --mode literacy --variant ${input} instead.`);
|
|
58
|
+
return { mode: "literacy", variant: input };
|
|
59
|
+
}
|
|
60
|
+
if (CANONICAL_MODES.has(input)) {
|
|
61
|
+
return { mode: input };
|
|
62
|
+
}
|
|
63
|
+
throw new Error(`Unknown mode "${input}". Valid modes: ${ALL_ACCEPTED.join(", ")}`);
|
|
64
|
+
}
|
package/dist/pipeline/plan.d.ts
CHANGED
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
* @see docs/exec-plans/execution-preview.md
|
|
11
11
|
*/
|
|
12
12
|
import type { DebugOptions, EvalMode } from "./types.js";
|
|
13
|
+
import { LiteracyVariant } from "./normalize-mode.js";
|
|
13
14
|
/** Comparison plan for --compare flag. */
|
|
14
15
|
export interface ComparisonPlan {
|
|
15
16
|
/** Age of the baseline in human-readable form */
|
|
@@ -121,8 +122,8 @@ export interface StepPlan {
|
|
|
121
122
|
export interface TaskPlan {
|
|
122
123
|
/** Test description */
|
|
123
124
|
description: string;
|
|
124
|
-
/** Whether this is a gold (with docs) or baseline (without docs) variant */
|
|
125
|
-
variant:
|
|
125
|
+
/** Whether this is a gold (with docs) or standard/baseline (without docs) variant */
|
|
126
|
+
variant: typeof LiteracyVariant.STANDARD | "gold";
|
|
126
127
|
}
|
|
127
128
|
/** Minimal options shape needed to build a pipeline execution plan. */
|
|
128
129
|
export interface PlanOptions {
|
|
@@ -138,6 +139,8 @@ export interface PlanOptions {
|
|
|
138
139
|
gapAnalysisEnabled: boolean;
|
|
139
140
|
graderReplications?: number;
|
|
140
141
|
mode: EvalMode;
|
|
142
|
+
/** Literacy variant when mode is "literacy" (baseline, agentic, observed, full) */
|
|
143
|
+
variant?: string;
|
|
141
144
|
noCache: boolean;
|
|
142
145
|
publishEnabled: boolean;
|
|
143
146
|
readinessEnabled: boolean;
|