@sanity/ailf 0.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/config/features.ts +23 -0
- package/config/models.ts +95 -0
- package/config/prompts.ts +16 -0
- package/config/rubrics.ts +225 -0
- package/config/schedules.ts +47 -0
- package/config/sinks.ts +37 -0
- package/config/sources.ts +21 -0
- package/config/thresholds.ts +61 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +171 -0
- package/dist/_vendor/ailf-core/config-helpers.js +170 -0
- package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
- package/dist/_vendor/ailf-core/env-helper.js +45 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/examples/index.js +25 -0
- package/dist/_vendor/ailf-core/index.d.ts +3 -0
- package/dist/_vendor/ailf-core/index.js +5 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +17 -2
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
- package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
- package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +8 -2
- package/dist/_vendor/ailf-core/schemas/eval-config.js +17 -2
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +9 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +8 -1
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -31
- package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -9
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
- package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
- package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/services/index.js +2 -1
- package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
- package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
- package/dist/_vendor/ailf-core/services/scoring.js +25 -15
- package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
- package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +332 -0
- package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +45 -83
- package/dist/_vendor/ailf-core/types/index.js +8 -1
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +257 -0
- package/dist/_vendor/ailf-core/types/plugin-registry.js +185 -0
- package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
- package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
- package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
- package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
- package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
- package/dist/_vendor/ailf-core/types/trace.js +18 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
- package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
- package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
- package/dist/_vendor/ailf-shared/index.d.ts +0 -1
- package/dist/_vendor/ailf-shared/index.js +0 -1
- package/dist/adapters/api-client/build-request.js +14 -13
- package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
- package/dist/adapters/config-sources/file-config-adapter.js +39 -12
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +1 -0
- package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
- package/dist/adapters/config-sources/ts-config-loader.js +141 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
- package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +35 -39
- package/dist/adapters/task-sources/index.d.ts +3 -2
- package/dist/adapters/task-sources/index.js +3 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
- package/dist/adapters/task-sources/repo-schemas.js +227 -19
- package/dist/adapters/task-sources/repo-task-source.d.ts +16 -12
- package/dist/adapters/task-sources/repo-task-source.js +92 -80
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
- package/dist/adapters/task-sources/task-file-loader.js +83 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
- package/dist/adapters/task-sources/yaml-task-source.js +19 -16
- package/dist/cli.js +0 -2
- package/dist/commands/baseline.js +4 -1
- package/dist/commands/calculate-scores.js +1 -1
- package/dist/commands/coverage-audit.js +9 -1
- package/dist/commands/explain-handler.js +25 -23
- package/dist/commands/fetch-docs.js +3 -2
- package/dist/commands/generate-configs.js +1 -1
- package/dist/commands/init.d.ts +6 -4
- package/dist/commands/init.js +302 -23
- package/dist/commands/interactive.js +11 -7
- package/dist/commands/pipeline-action.d.ts +2 -0
- package/dist/commands/pipeline-action.js +16 -6
- package/dist/commands/pipeline.d.ts +1 -0
- package/dist/commands/pipeline.js +4 -2
- package/dist/commands/pr-comment.js +1 -1
- package/dist/commands/publish.js +2 -2
- package/dist/commands/readiness-report.js +13 -6
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +13 -1
- package/dist/composition-root.js +99 -4
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/orchestration/build-app-context.js +1 -0
- package/dist/orchestration/build-step-sequence.js +28 -8
- package/dist/orchestration/steps/calculate-scores-step.js +24 -11
- package/dist/orchestration/steps/fetch-docs-step.js +8 -7
- package/dist/orchestration/steps/gap-analysis-step.js +8 -7
- package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
- package/dist/orchestration/steps/generate-configs-step.js +261 -51
- package/dist/orchestration/steps/grader-consistency-step.js +7 -4
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/readiness-step.js +5 -6
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
- package/dist/orchestration/steps/run-eval-step.js +8 -7
- package/dist/pipeline/cache.d.ts +1 -1
- package/dist/pipeline/cache.js +36 -8
- package/dist/pipeline/calculate-scores.d.ts +2 -4
- package/dist/pipeline/calculate-scores.js +43 -113
- package/dist/pipeline/checks.js +2 -2
- package/dist/pipeline/compare.js +8 -8
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +392 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +404 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
- package/dist/pipeline/compiler/assertion-mapper.js +175 -0
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
- package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
- package/dist/pipeline/compiler/config-loader.d.ts +56 -0
- package/dist/pipeline/compiler/config-loader.js +111 -0
- package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
- package/dist/pipeline/compiler/fixture-resolver.js +113 -0
- package/dist/pipeline/compiler/hash.d.ts +11 -0
- package/dist/pipeline/compiler/hash.js +18 -0
- package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
- package/dist/pipeline/compiler/ignore-fields.js +113 -0
- package/dist/pipeline/compiler/index.d.ts +29 -0
- package/dist/pipeline/compiler/index.js +45 -0
- package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
- package/dist/pipeline/compiler/literacy-bridge.js +172 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +15 -0
- package/dist/pipeline/compiler/mode-handlers/index.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/index.d.ts +9 -0
- package/dist/pipeline/compiler/presets/index.js +8 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +42 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.js +208 -0
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
- package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
- package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
- package/dist/pipeline/compiler/provider-assembler.js +137 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
- package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
- package/dist/pipeline/compiler/sandbox/index.js +11 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
- package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
- package/dist/pipeline/compiler/scoring-bridge.js +114 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
- package/dist/pipeline/compiler/task-graph-builder.js +291 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
- package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
- package/dist/pipeline/compiler/telemetry/index.js +19 -0
- package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
- package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
- package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
- package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
- package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
- package/dist/pipeline/compiler/variable-resolver.js +115 -0
- package/dist/pipeline/coverage-audit.d.ts +15 -5
- package/dist/pipeline/coverage-audit.js +41 -22
- package/dist/pipeline/eval-constants.d.ts +16 -6
- package/dist/pipeline/eval-constants.js +25 -4
- package/dist/pipeline/eval-fingerprint.d.ts +2 -2
- package/dist/pipeline/eval-fingerprint.js +8 -9
- package/dist/pipeline/expand-tasks.d.ts +19 -10
- package/dist/pipeline/expand-tasks.js +34 -28
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +2 -2
- package/dist/pipeline/generate-configs.d.ts +22 -4
- package/dist/pipeline/generate-configs.js +53 -24
- package/dist/pipeline/grader-api.d.ts +3 -3
- package/dist/pipeline/grader-api.js +5 -12
- package/dist/pipeline/grader-compare-runner.js +20 -27
- package/dist/pipeline/grader-comparison.d.ts +4 -8
- package/dist/pipeline/grader-comparison.js +11 -17
- package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
- package/dist/pipeline/grader-consistency-runner.js +16 -20
- package/dist/pipeline/grader-consistency.d.ts +6 -10
- package/dist/pipeline/grader-consistency.js +13 -32
- package/dist/pipeline/grader-sensitivity-runner.js +7 -5
- package/dist/pipeline/grader-sensitivity.d.ts +2 -6
- package/dist/pipeline/grader-sensitivity.js +10 -10
- package/dist/pipeline/grader-validate-runner.js +7 -5
- package/dist/pipeline/grader-validation.d.ts +2 -6
- package/dist/pipeline/grader-validation.js +14 -22
- package/dist/pipeline/map-request-to-config.js +7 -1
- package/dist/pipeline/mirror-repo-tasks.d.ts +13 -13
- package/dist/pipeline/mirror-repo-tasks.js +22 -21
- package/dist/pipeline/normalize-mode.d.ts +49 -0
- package/dist/pipeline/normalize-mode.js +64 -0
- package/dist/pipeline/plan.d.ts +5 -2
- package/dist/pipeline/plan.js +134 -78
- package/dist/pipeline/pr-comment.js +2 -0
- package/dist/pipeline/profile-resolution.d.ts +22 -14
- package/dist/pipeline/profile-resolution.js +41 -19
- package/dist/pipeline/provenance.d.ts +2 -2
- package/dist/pipeline/provenance.js +12 -17
- package/dist/pipeline/release-report.js +4 -4
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/rubric-loader.d.ts +20 -0
- package/dist/pipeline/rubric-loader.js +37 -0
- package/dist/pipeline/validate.d.ts +4 -4
- package/dist/pipeline/validate.js +64 -53
- package/dist/schedules/loader.js +18 -8
- package/dist/scripts/migrate-task-mode.d.ts +24 -0
- package/dist/scripts/migrate-task-mode.js +85 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +15 -15
- package/dist/sinks/loader.js +5 -7
- package/dist/sources.d.ts +7 -7
- package/dist/sources.js +22 -24
- package/dist/webhook/dispatch.js +2 -1
- package/package.json +15 -4
- package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
- package/tasks/literacy/frameworks.task.ts +128 -0
- package/tasks/literacy/functions.task.ts +69 -0
- package/tasks/literacy/groq.task.ts +258 -0
- package/tasks/literacy/nextjs-live.task.ts +75 -0
- package/tasks/literacy/studio-setup.task.ts +131 -0
- package/tasks/literacy/visual-editing.task.ts +146 -0
- package/config/features.yaml +0 -116
- package/config/models.yaml +0 -116
- package/config/prompts.yaml +0 -75
- package/config/rubrics.yaml +0 -81
- package/config/schedules.yaml +0 -43
- package/config/sinks.yaml +0 -54
- package/config/sources.yaml +0 -51
- package/config/thresholds.yaml +0 -49
- package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
- package/dist/_vendor/ailf-tasks/cli.js +0 -61
- package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
- package/dist/_vendor/ailf-tasks/index.js +0 -16
- package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
- package/dist/_vendor/ailf-tasks/parser.js +0 -73
- package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
- package/dist/_vendor/ailf-tasks/schemas.js +0 -180
- package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
- package/dist/_vendor/ailf-tasks/validation.js +0 -162
- package/dist/agent-observer/test-imports.d.ts +0 -7
- package/dist/agent-observer/test-imports.js +0 -185
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
* Fields marked optional are transitional — they will become required
|
|
12
12
|
* as downstream consumers are converted to use them.
|
|
13
13
|
*/
|
|
14
|
-
import type { DebugOptions, EvalMode } from "../types/index.js";
|
|
14
|
+
import type { DebugOptions, EvalMode, PluginRegistry } from "../types/index.js";
|
|
15
15
|
import type { CacheStore } from "./cache-store.js";
|
|
16
16
|
import type { DocFetcher } from "./doc-fetcher.js";
|
|
17
17
|
import type { EvalRunner } from "./eval-runner.js";
|
|
@@ -27,8 +27,19 @@ import type { TaskSource } from "./task-source.js";
|
|
|
27
27
|
export interface ResolvedConfig {
|
|
28
28
|
/** Eval package root directory */
|
|
29
29
|
rootDir: string;
|
|
30
|
-
/** Evaluation mode */
|
|
30
|
+
/** Evaluation mode — canonical name (e.g., "literacy", "knowledge-probe") */
|
|
31
31
|
mode: EvalMode;
|
|
32
|
+
/**
|
|
33
|
+
* Literacy variant — only meaningful when mode is "literacy".
|
|
34
|
+
*
|
|
35
|
+
* When a user passes `--mode baseline`, the CLI normalizes this to
|
|
36
|
+
* `mode: "literacy", variant: "baseline"`. This keeps the pipeline
|
|
37
|
+
* mode-agnostic while preserving literacy's multi-variant behavior.
|
|
38
|
+
*
|
|
39
|
+
* Values: "baseline" | "agentic" | "observed" | "full" | undefined
|
|
40
|
+
* Undefined means "use the default variant for the mode" (baseline for literacy).
|
|
41
|
+
*/
|
|
42
|
+
variant?: string;
|
|
32
43
|
/** Debug options */
|
|
33
44
|
debug?: DebugOptions;
|
|
34
45
|
/** Feature area filter */
|
|
@@ -129,6 +140,8 @@ export interface ResolvedConfig {
|
|
|
129
140
|
apiUrl: string;
|
|
130
141
|
/** AILF API key (from AILF_API_KEY env var) */
|
|
131
142
|
apiKey?: string;
|
|
143
|
+
/** External preset file paths or npm package names to load */
|
|
144
|
+
presets?: string[];
|
|
132
145
|
}
|
|
133
146
|
/**
|
|
134
147
|
* Application context — the complete dependency carrier.
|
|
@@ -153,6 +166,8 @@ export interface AppContext {
|
|
|
153
166
|
readonly evalRunner: EvalRunner;
|
|
154
167
|
/** Structured logger */
|
|
155
168
|
readonly logger: Logger;
|
|
169
|
+
/** Plugin registry — mode handlers, assertions, rubric templates, etc. */
|
|
170
|
+
readonly registry: PluginRegistry;
|
|
156
171
|
/**
|
|
157
172
|
* Persistent report store (Sanity Content Lake).
|
|
158
173
|
* Optional — not all commands need it. Commands that publish or
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
* The pipeline orchestrator and all downstream steps work with
|
|
10
10
|
* FetchResult regardless of where the documentation came from.
|
|
11
11
|
*/
|
|
12
|
-
import type {
|
|
12
|
+
import type { GeneralizedTaskDefinition } from "../types/generalized-task.js";
|
|
13
13
|
/**
|
|
14
14
|
* A fetched documentation context ready for injection into prompts.
|
|
15
15
|
*
|
|
@@ -127,5 +127,5 @@ export interface DocFetcher {
|
|
|
127
127
|
* @param source — Where to fetch documentation from
|
|
128
128
|
* @returns Fetched doc contexts + optional metadata
|
|
129
129
|
*/
|
|
130
|
-
fetch(tasks:
|
|
130
|
+
fetch(tasks: GeneralizedTaskDefinition[], source?: DocSourceConfig): Promise<FetchResult>;
|
|
131
131
|
}
|
|
@@ -9,7 +9,8 @@ export type { ConfigSource } from "./config-source.js";
|
|
|
9
9
|
export type { AppContext, ReportSinkPort, ReportStorePort, ResolvedConfig, } from "./context.js";
|
|
10
10
|
export type { DocContext, DocFetcher, DocSourceConfig, DocumentManifestEntry, DocumentOverlaySummary, FetchMetadata, FetchResult, ReleaseImpact, UrlFetchEntry, UrlFetchSummary, } from "./doc-fetcher.js";
|
|
11
11
|
export type { EvalRunConfig, EvalRunner } from "./eval-runner.js";
|
|
12
|
+
export type { CompilationContext, CompileResultAssertion, CompileResultPrompt, CompileResultProvider, CompileResultTestCase, ModeCompileResult, ModeHandler, ModeProviderEntry, ModeRubricConfig, PromptTemplate, } from "./mode-handler.js";
|
|
12
13
|
export type { Logger } from "./logger.js";
|
|
13
14
|
export type { PipelineStep } from "./pipeline-step.js";
|
|
14
|
-
export type {
|
|
15
|
+
export type { TaskSource } from "./task-source.js";
|
|
15
16
|
export { canonicalDocRefLabel, isIdRef, isPathRef, isPerspectiveRef, isSlugRef, isTemplatedAssertion, } from "./task-source.js";
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ModeHandler — the common interface every evaluation mode implements.
|
|
3
|
+
*
|
|
4
|
+
* The pipeline dispatches to mode handlers through the PluginRegistry:
|
|
5
|
+
* 1. Look up the mode: `ctx.registry.getMode(mode)`
|
|
6
|
+
* 2. Import the handler module: `import(registration.handlerModule)`
|
|
7
|
+
* 3. Call: `module.handler.compileTask(task, ctx)`
|
|
8
|
+
*
|
|
9
|
+
* Each handler file exports a `handler` object conforming to this interface.
|
|
10
|
+
* The handler narrows the GeneralizedTaskDefinition to its mode-specific
|
|
11
|
+
* variant and produces a ModeCompileResult.
|
|
12
|
+
*
|
|
13
|
+
* Types here are minimal structural contracts — the eval package's Promptfoo
|
|
14
|
+
* types satisfy them via TypeScript structural compatibility.
|
|
15
|
+
*
|
|
16
|
+
* @see docs/design-docs/architecture-overhaul/extensibility-plugins.md
|
|
17
|
+
* @see packages/eval/src/pipeline/compiler/mode-handlers/
|
|
18
|
+
*/
|
|
19
|
+
import type { GeneralizedTaskDefinition } from "../types/generalized-task.js";
|
|
20
|
+
/**
|
|
21
|
+
* A prompt template owned by a mode handler.
|
|
22
|
+
*
|
|
23
|
+
* Mode handlers can return these via `getPrompts()` to override the global
|
|
24
|
+
* config/prompts.ts templates. This lets non-literacy modes define their
|
|
25
|
+
* own prompt structures without polluting the global config.
|
|
26
|
+
*/
|
|
27
|
+
export interface PromptTemplate {
|
|
28
|
+
/** Unique identifier (e.g. "with-docs", "agentic") */
|
|
29
|
+
id: string;
|
|
30
|
+
/** Human-readable label for display */
|
|
31
|
+
label: string;
|
|
32
|
+
/** The prompt template string with {{variable}} placeholders */
|
|
33
|
+
template: string;
|
|
34
|
+
/** Variable names used by this template — for documentation only */
|
|
35
|
+
variables?: string[];
|
|
36
|
+
}
|
|
37
|
+
/** Compilation context — shared state the pipeline provides to every handler */
|
|
38
|
+
export interface CompilationContext {
|
|
39
|
+
/** Eval package root directory (for resolving file paths) */
|
|
40
|
+
rootDir: string;
|
|
41
|
+
/** Grader provider ID for LLM-graded assertions */
|
|
42
|
+
graderProvider?: string;
|
|
43
|
+
/** Model providers to include in the evaluation */
|
|
44
|
+
models?: ModeProviderEntry[];
|
|
45
|
+
/** Rubric config (templates, weights) — loaded from config/rubrics */
|
|
46
|
+
rubricConfig?: ModeRubricConfig;
|
|
47
|
+
}
|
|
48
|
+
/** A model provider entry for compilation */
|
|
49
|
+
export interface ModeProviderEntry {
|
|
50
|
+
id: string;
|
|
51
|
+
label: string;
|
|
52
|
+
config?: Record<string, unknown>;
|
|
53
|
+
}
|
|
54
|
+
/** Minimal rubric config needed by mode handlers */
|
|
55
|
+
export interface ModeRubricConfig {
|
|
56
|
+
templates: Record<string, {
|
|
57
|
+
dimension?: string;
|
|
58
|
+
header: string;
|
|
59
|
+
scale: string[];
|
|
60
|
+
criteria_label?: string;
|
|
61
|
+
}>;
|
|
62
|
+
}
|
|
63
|
+
/** A provider entry in the compile result */
|
|
64
|
+
export interface CompileResultProvider {
|
|
65
|
+
id: string;
|
|
66
|
+
label?: string;
|
|
67
|
+
config?: Record<string, unknown>;
|
|
68
|
+
}
|
|
69
|
+
/** A prompt entry in the compile result */
|
|
70
|
+
export interface CompileResultPrompt {
|
|
71
|
+
id: string;
|
|
72
|
+
label: string;
|
|
73
|
+
raw: string;
|
|
74
|
+
}
|
|
75
|
+
/** A test case entry in the compile result */
|
|
76
|
+
export interface CompileResultTestCase {
|
|
77
|
+
description: string;
|
|
78
|
+
vars: Record<string, unknown>;
|
|
79
|
+
assert?: CompileResultAssertion[];
|
|
80
|
+
prompts?: string[];
|
|
81
|
+
}
|
|
82
|
+
/** An assertion entry in a test case */
|
|
83
|
+
export interface CompileResultAssertion {
|
|
84
|
+
type: string;
|
|
85
|
+
value?: unknown;
|
|
86
|
+
weight?: number;
|
|
87
|
+
provider?: string;
|
|
88
|
+
metadata?: Record<string, unknown>;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* ModeCompileResult — the common output every mode handler produces.
|
|
92
|
+
*
|
|
93
|
+
* All four fields (providers, tests, prompts, warnings) are present in
|
|
94
|
+
* every handler's result type. Mode-specific extras (sandbox config,
|
|
95
|
+
* metadata, extensions) go in the `extras` bag.
|
|
96
|
+
*/
|
|
97
|
+
export interface ModeCompileResult {
|
|
98
|
+
/** Provider configurations for Promptfoo */
|
|
99
|
+
providers: CompileResultProvider[];
|
|
100
|
+
/** Compiled test cases */
|
|
101
|
+
tests: CompileResultTestCase[];
|
|
102
|
+
/** Prompt templates */
|
|
103
|
+
prompts: CompileResultPrompt[];
|
|
104
|
+
/** Warnings generated during compilation */
|
|
105
|
+
warnings: string[];
|
|
106
|
+
/** Mode-specific extras (extensions, sandboxConfig, metadata, etc.) */
|
|
107
|
+
extras?: Record<string, unknown>;
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* ModeHandler — the interface every evaluation mode handler exports.
|
|
111
|
+
*
|
|
112
|
+
* Handler modules are referenced by `ModeRegistration.handlerModule` in the
|
|
113
|
+
* plugin registry. The pipeline imports the module and calls `handler.compileTask()`.
|
|
114
|
+
*
|
|
115
|
+
* Each handler file should export:
|
|
116
|
+
* export const handler: ModeHandler = { ... }
|
|
117
|
+
*/
|
|
118
|
+
export interface ModeHandler {
|
|
119
|
+
/** Compile a task definition into evaluation configuration */
|
|
120
|
+
compileTask(task: GeneralizedTaskDefinition, ctx: CompilationContext): ModeCompileResult;
|
|
121
|
+
/**
|
|
122
|
+
* Return prompt templates owned by this mode.
|
|
123
|
+
*
|
|
124
|
+
* When defined, the compiler uses these instead of global config/prompts.ts.
|
|
125
|
+
* Keys are prompt IDs (e.g. "with-docs", "agentic"). Returning undefined
|
|
126
|
+
* or omitting the method falls back to global prompts.
|
|
127
|
+
*/
|
|
128
|
+
getPrompts?(): Record<string, PromptTemplate>;
|
|
129
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ModeHandler — the common interface every evaluation mode implements.
|
|
3
|
+
*
|
|
4
|
+
* The pipeline dispatches to mode handlers through the PluginRegistry:
|
|
5
|
+
* 1. Look up the mode: `ctx.registry.getMode(mode)`
|
|
6
|
+
* 2. Import the handler module: `import(registration.handlerModule)`
|
|
7
|
+
* 3. Call: `module.handler.compileTask(task, ctx)`
|
|
8
|
+
*
|
|
9
|
+
* Each handler file exports a `handler` object conforming to this interface.
|
|
10
|
+
* The handler narrows the GeneralizedTaskDefinition to its mode-specific
|
|
11
|
+
* variant and produces a ModeCompileResult.
|
|
12
|
+
*
|
|
13
|
+
* Types here are minimal structural contracts — the eval package's Promptfoo
|
|
14
|
+
* types satisfy them via TypeScript structural compatibility.
|
|
15
|
+
*
|
|
16
|
+
* @see docs/design-docs/architecture-overhaul/extensibility-plugins.md
|
|
17
|
+
* @see packages/eval/src/pipeline/compiler/mode-handlers/
|
|
18
|
+
*/
|
|
19
|
+
export {};
|
|
@@ -7,150 +7,44 @@
|
|
|
7
7
|
* - RepoTaskSource (tasks-as-content Phase 4) — reads .ailf/tasks/
|
|
8
8
|
*
|
|
9
9
|
* The key invariant: the pipeline orchestrator and all downstream steps
|
|
10
|
-
* work with
|
|
10
|
+
* work with GeneralizedTaskDefinition[] regardless of where they came from.
|
|
11
11
|
*/
|
|
12
|
+
import type { GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, IdDocRef, PathDocRef, PerspectiveDocRef, SlugDocRef } from "../types/generalized-task.js";
|
|
12
13
|
import type { FilterOptions } from "../types/index.js";
|
|
13
|
-
/**
|
|
14
|
-
export interface TemplatedAssertion {
|
|
15
|
-
type: "llm-rubric";
|
|
16
|
-
template: string;
|
|
17
|
-
criteria: string[];
|
|
18
|
-
weight?: number;
|
|
19
|
-
}
|
|
20
|
-
/** A value-based assertion (contains, javascript, etc.) */
|
|
21
|
-
export interface ValueAssertion {
|
|
22
|
-
type: string;
|
|
23
|
-
value?: unknown;
|
|
24
|
-
weight?: number;
|
|
25
|
-
[key: string]: unknown;
|
|
26
|
-
}
|
|
27
|
-
/** Any assertion definition — either templated or value-based */
|
|
28
|
-
export type AssertionDefinition = TemplatedAssertion | ValueAssertion;
|
|
29
|
-
/** Baseline variant configuration */
|
|
30
|
-
export interface BaselineConfig {
|
|
31
|
-
/** Whether to generate a baseline variant. Default: true */
|
|
32
|
-
enabled?: boolean;
|
|
33
|
-
/** Rubric mode for baseline. Default: "full" */
|
|
34
|
-
rubric?: "abbreviated" | "full" | "none";
|
|
35
|
-
}
|
|
36
|
-
/**
|
|
37
|
-
* A canonical documentation reference. Each entry resolves docs through
|
|
38
|
-
* one of four strategies, discriminated by key presence (no explicit
|
|
39
|
-
* `type` field). All strategies carry an optional `reason` for context.
|
|
40
|
-
*
|
|
41
|
-
* Strategies:
|
|
42
|
-
* - `slug` — one article by slug field (legacy, may not be unique)
|
|
43
|
-
* - `path` — one article by URL path (unique across sections)
|
|
44
|
-
* - `id` — one document by Sanity `_id` (drafts, imports)
|
|
45
|
-
* - `perspective` — all articles in a content release (one-to-many)
|
|
46
|
-
*
|
|
47
|
-
* @see docs/design-docs/canonical-doc-resolution.md
|
|
48
|
-
*/
|
|
49
|
-
export type CanonicalDocRef = SlugDocRef | PathDocRef | IdDocRef | PerspectiveDocRef;
|
|
50
|
-
/** Resolve by article slug field. Legacy — prefer `path` for uniqueness. */
|
|
51
|
-
export interface SlugDocRef {
|
|
52
|
-
slug: string;
|
|
53
|
-
reason?: string;
|
|
54
|
-
}
|
|
55
|
-
/** Resolve by URL path (after /docs/). Unique across sections. */
|
|
56
|
-
export interface PathDocRef {
|
|
57
|
-
path: string;
|
|
58
|
-
reason?: string;
|
|
59
|
-
}
|
|
60
|
-
/** Resolve by Sanity document `_id`. The primary resolution strategy.
|
|
61
|
-
*
|
|
62
|
-
* Optional `slug` and `path` provide human-readable context — they are
|
|
63
|
-
* NOT used for resolution (the `_id` is authoritative) but help YAML
|
|
64
|
-
* authors understand which document is being referenced. The Content Lake
|
|
65
|
-
* adapter populates them from the dereferenced article.
|
|
66
|
-
*/
|
|
67
|
-
export interface IdDocRef {
|
|
68
|
-
id: string;
|
|
69
|
-
reason?: string;
|
|
70
|
-
/** Human-readable slug (informational only — not used for resolution) */
|
|
71
|
-
slug?: string;
|
|
72
|
-
/** Human-readable path (informational only — not used for resolution) */
|
|
73
|
-
path?: string;
|
|
74
|
-
}
|
|
75
|
-
/** Resolve all articles in a content release. One-to-many. */
|
|
76
|
-
export interface PerspectiveDocRef {
|
|
77
|
-
perspective: string;
|
|
78
|
-
reason?: string;
|
|
79
|
-
}
|
|
80
|
-
/**
|
|
81
|
-
* A loaded, validated task definition ready for expansion.
|
|
82
|
-
*
|
|
83
|
-
* This is the canonical intermediate representation — adapters produce
|
|
84
|
-
* this from YAML, Content Lake, or .ailf/ files. Downstream consumers
|
|
85
|
-
* (expansion, doc fetching, validation) work exclusively with this type.
|
|
86
|
-
*
|
|
87
|
-
* Design notes:
|
|
88
|
-
* - `taskPrompt` is extracted from `vars.task` in YAML format
|
|
89
|
-
* - `docsPath` is NOT included — it's an infrastructure detail derived
|
|
90
|
-
* from convention (`file://contexts/canonical/${id}.md`)
|
|
91
|
-
* - `featureArea` is derived by the adapter (filename stem, document
|
|
92
|
-
* field, directory structure — depends on the source)
|
|
93
|
-
*/
|
|
94
|
-
export interface TaskDefinition {
|
|
95
|
-
/** Unique task identifier */
|
|
96
|
-
id: string;
|
|
97
|
-
/** Human-readable description */
|
|
98
|
-
description: string;
|
|
99
|
-
/** Feature area this task belongs to */
|
|
100
|
-
featureArea: string;
|
|
101
|
-
/** The implementation task prompt (the user-facing request) */
|
|
102
|
-
taskPrompt: string;
|
|
103
|
-
/** Canonical doc references with reasons */
|
|
104
|
-
canonicalDocs: CanonicalDocRef[];
|
|
105
|
-
/** Path to the reference solution (relative to eval package root) */
|
|
106
|
-
referenceSolution: string;
|
|
107
|
-
/** Whether doc coverage rubric should be auto-generated */
|
|
108
|
-
docCoverage: boolean;
|
|
109
|
-
/** Assertion definitions (rubric templates + value assertions) */
|
|
110
|
-
assertions: AssertionDefinition[];
|
|
111
|
-
/** Baseline variant configuration */
|
|
112
|
-
baseline?: BaselineConfig;
|
|
113
|
-
/** Additional template variables beyond task (e.g., custom vars) */
|
|
114
|
-
extraVars?: Record<string, unknown>;
|
|
115
|
-
/** Lifecycle status — controls pipeline inclusion. Absent = "active". */
|
|
116
|
-
status?: "active" | "archived" | "draft" | "paused";
|
|
117
|
-
/** Freeform labels for filtering and organization */
|
|
118
|
-
tags?: string[];
|
|
119
|
-
}
|
|
120
|
-
/** Check if a canonical doc ref resolves by slug.
|
|
14
|
+
/** Check if a doc ref resolves by slug.
|
|
121
15
|
*
|
|
122
16
|
* Excludes IdDocRef (which may carry an optional `slug` for display).
|
|
123
17
|
* When both `id` and `slug` are present, it's an IdDocRef, not a SlugDocRef.
|
|
124
18
|
*/
|
|
125
|
-
export declare function isSlugRef(ref:
|
|
126
|
-
/** Check if a
|
|
19
|
+
export declare function isSlugRef(ref: GeneralizedDocRef): ref is SlugDocRef;
|
|
20
|
+
/** Check if a doc ref resolves by path.
|
|
127
21
|
*
|
|
128
22
|
* Excludes IdDocRef (which may carry an optional `path` for display).
|
|
129
23
|
* When both `id` and `path` are present, it's an IdDocRef, not a PathDocRef.
|
|
130
24
|
*/
|
|
131
|
-
export declare function isPathRef(ref:
|
|
132
|
-
/** Check if a
|
|
25
|
+
export declare function isPathRef(ref: GeneralizedDocRef): ref is PathDocRef;
|
|
26
|
+
/** Check if a doc ref resolves by document ID.
|
|
133
27
|
*
|
|
134
28
|
* Uses `"id" in ref` as the primary discriminator. IdDocRef may also carry
|
|
135
29
|
* optional `slug` and `path` for display purposes, so we cannot exclude
|
|
136
30
|
* on those keys. When both `id` and `slug` are present, `id` wins.
|
|
137
31
|
*/
|
|
138
|
-
export declare function isIdRef(ref:
|
|
139
|
-
/** Check if a
|
|
140
|
-
export declare function isPerspectiveRef(ref:
|
|
32
|
+
export declare function isIdRef(ref: GeneralizedDocRef): ref is IdDocRef;
|
|
33
|
+
/** Check if a doc ref resolves by content release perspective */
|
|
34
|
+
export declare function isPerspectiveRef(ref: GeneralizedDocRef): ref is PerspectiveDocRef;
|
|
141
35
|
/**
|
|
142
|
-
* Extract a display identifier from any
|
|
36
|
+
* Extract a display identifier from any doc ref.
|
|
143
37
|
* Useful for logging, error messages, and retrieval metrics.
|
|
144
38
|
*/
|
|
145
|
-
export declare function canonicalDocRefLabel(ref:
|
|
39
|
+
export declare function canonicalDocRefLabel(ref: GeneralizedDocRef): string;
|
|
146
40
|
/** Check if an assertion uses the templated format (template + criteria) */
|
|
147
|
-
export declare function isTemplatedAssertion(entry:
|
|
41
|
+
export declare function isTemplatedAssertion(entry: GeneralizedAssertionDefinition): entry is GeneralizedTemplatedAssertion;
|
|
148
42
|
/**
|
|
149
43
|
* Port: Where task definitions come from.
|
|
150
44
|
*
|
|
151
45
|
* The pipeline never knows HOW tasks are loaded — it only sees
|
|
152
|
-
*
|
|
153
|
-
* filesystem scanning, etc.
|
|
46
|
+
* GeneralizedTaskDefinition[]. The adapter handles YAML parsing, GROQ
|
|
47
|
+
* queries, filesystem scanning, etc.
|
|
154
48
|
*/
|
|
155
49
|
export interface TaskSource {
|
|
156
50
|
/**
|
|
@@ -159,5 +53,5 @@ export interface TaskSource {
|
|
|
159
53
|
* @param filter — Area, task ID, or changed-doc filters
|
|
160
54
|
* @returns Validated task definitions ready for expansion
|
|
161
55
|
*/
|
|
162
|
-
loadTasks(filter?: FilterOptions): Promise<
|
|
56
|
+
loadTasks(filter?: FilterOptions): Promise<GeneralizedTaskDefinition[]>;
|
|
163
57
|
}
|
|
@@ -7,12 +7,12 @@
|
|
|
7
7
|
* - RepoTaskSource (tasks-as-content Phase 4) — reads .ailf/tasks/
|
|
8
8
|
*
|
|
9
9
|
* The key invariant: the pipeline orchestrator and all downstream steps
|
|
10
|
-
* work with
|
|
10
|
+
* work with GeneralizedTaskDefinition[] regardless of where they came from.
|
|
11
11
|
*/
|
|
12
12
|
// ---------------------------------------------------------------------------
|
|
13
|
-
// Type guards —
|
|
13
|
+
// Type guards — doc refs
|
|
14
14
|
// ---------------------------------------------------------------------------
|
|
15
|
-
/** Check if a
|
|
15
|
+
/** Check if a doc ref resolves by slug.
|
|
16
16
|
*
|
|
17
17
|
* Excludes IdDocRef (which may carry an optional `slug` for display).
|
|
18
18
|
* When both `id` and `slug` are present, it's an IdDocRef, not a SlugDocRef.
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
export function isSlugRef(ref) {
|
|
21
21
|
return "slug" in ref && !("id" in ref);
|
|
22
22
|
}
|
|
23
|
-
/** Check if a
|
|
23
|
+
/** Check if a doc ref resolves by path.
|
|
24
24
|
*
|
|
25
25
|
* Excludes IdDocRef (which may carry an optional `path` for display).
|
|
26
26
|
* When both `id` and `path` are present, it's an IdDocRef, not a PathDocRef.
|
|
@@ -28,7 +28,7 @@ export function isSlugRef(ref) {
|
|
|
28
28
|
export function isPathRef(ref) {
|
|
29
29
|
return "path" in ref && !("id" in ref);
|
|
30
30
|
}
|
|
31
|
-
/** Check if a
|
|
31
|
+
/** Check if a doc ref resolves by document ID.
|
|
32
32
|
*
|
|
33
33
|
* Uses `"id" in ref` as the primary discriminator. IdDocRef may also carry
|
|
34
34
|
* optional `slug` and `path` for display purposes, so we cannot exclude
|
|
@@ -37,12 +37,12 @@ export function isPathRef(ref) {
|
|
|
37
37
|
export function isIdRef(ref) {
|
|
38
38
|
return "id" in ref;
|
|
39
39
|
}
|
|
40
|
-
/** Check if a
|
|
40
|
+
/** Check if a doc ref resolves by content release perspective */
|
|
41
41
|
export function isPerspectiveRef(ref) {
|
|
42
42
|
return "perspective" in ref;
|
|
43
43
|
}
|
|
44
44
|
/**
|
|
45
|
-
* Extract a display identifier from any
|
|
45
|
+
* Extract a display identifier from any doc ref.
|
|
46
46
|
* Useful for logging, error messages, and retrieval metrics.
|
|
47
47
|
*/
|
|
48
48
|
export function canonicalDocRefLabel(ref) {
|
|
@@ -29,10 +29,15 @@ export declare const EvalConfigSchema: z.ZodObject<{
|
|
|
29
29
|
graderReplications: z.ZodOptional<z.ZodNumber>;
|
|
30
30
|
headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
31
31
|
mode: z.ZodOptional<z.ZodEnum<{
|
|
32
|
-
|
|
32
|
+
custom: "custom";
|
|
33
|
+
literacy: "literacy";
|
|
34
|
+
"mcp-server": "mcp-server";
|
|
35
|
+
"agent-harness": "agent-harness";
|
|
36
|
+
"knowledge-probe": "knowledge-probe";
|
|
33
37
|
baseline: "baseline";
|
|
34
|
-
|
|
38
|
+
agentic: "agentic";
|
|
35
39
|
observed: "observed";
|
|
40
|
+
full: "full";
|
|
36
41
|
}>>;
|
|
37
42
|
noAutoScope: z.ZodOptional<z.ZodBoolean>;
|
|
38
43
|
noCache: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -52,5 +57,6 @@ export declare const EvalConfigSchema: z.ZodObject<{
|
|
|
52
57
|
source: z.ZodOptional<z.ZodString>;
|
|
53
58
|
tasks: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
54
59
|
urls: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
60
|
+
presets: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
55
61
|
}, z.core.$strict>;
|
|
56
62
|
export type EvalConfig = z.infer<typeof EvalConfigSchema>;
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
* (studio-eval-config) so Content Lake documents validate identically.
|
|
11
11
|
*/
|
|
12
12
|
import { z } from "zod";
|
|
13
|
+
import { RAW_EVAL_MODES } from "../../ailf-shared/index.js";
|
|
13
14
|
export const EvalConfigSchema = z
|
|
14
15
|
.object({
|
|
15
16
|
/** Allowed origins for agentic mode */
|
|
@@ -46,8 +47,12 @@ export const EvalConfigSchema = z
|
|
|
46
47
|
graderReplications: z.number().int().positive().optional(),
|
|
47
48
|
/** Custom headers for doc fetching */
|
|
48
49
|
headers: z.record(z.string(), z.string()).optional(),
|
|
49
|
-
/**
|
|
50
|
-
|
|
50
|
+
/**
|
|
51
|
+
* Evaluation mode — accepts both canonical and legacy names.
|
|
52
|
+
* Legacy names ("baseline", "agentic", "observed", "full") must pass
|
|
53
|
+
* through normalizeMode() before entering typed pipeline code.
|
|
54
|
+
*/
|
|
55
|
+
mode: z.enum(RAW_EVAL_MODES).optional(),
|
|
51
56
|
/** Disable release-aware auto-scoping */
|
|
52
57
|
noAutoScope: z.boolean().optional(),
|
|
53
58
|
/** Disable local cache */
|
|
@@ -76,5 +81,15 @@ export const EvalConfigSchema = z
|
|
|
76
81
|
tasks: z.array(z.string()).optional(),
|
|
77
82
|
/** Doc source URL overrides */
|
|
78
83
|
urls: z.array(z.string().url()).optional(),
|
|
84
|
+
/**
|
|
85
|
+
* External presets to load — file paths or npm package names.
|
|
86
|
+
*
|
|
87
|
+
* Each entry is resolved as:
|
|
88
|
+
* - Relative path (./foo or ../foo): loaded from disk via jiti
|
|
89
|
+
* - Package name: resolved via Node require
|
|
90
|
+
*
|
|
91
|
+
* Presets are registered in order after built-in presets.
|
|
92
|
+
*/
|
|
93
|
+
presets: z.array(z.string()).optional(),
|
|
79
94
|
})
|
|
80
95
|
.strict();
|
|
@@ -49,10 +49,15 @@ export declare const PipelineRequestSchema: z.ZodObject<{
|
|
|
49
49
|
inlineTasks: z.ZodOptional<z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
|
|
50
50
|
jobId: z.ZodOptional<z.ZodString>;
|
|
51
51
|
mode: z.ZodOptional<z.ZodEnum<{
|
|
52
|
-
|
|
52
|
+
custom: "custom";
|
|
53
|
+
literacy: "literacy";
|
|
54
|
+
"mcp-server": "mcp-server";
|
|
55
|
+
"agent-harness": "agent-harness";
|
|
56
|
+
"knowledge-probe": "knowledge-probe";
|
|
53
57
|
baseline: "baseline";
|
|
54
|
-
|
|
58
|
+
agentic: "agentic";
|
|
55
59
|
observed: "observed";
|
|
60
|
+
full: "full";
|
|
56
61
|
}>>;
|
|
57
62
|
noAutoScope: z.ZodOptional<z.ZodBoolean>;
|
|
58
63
|
noCache: z.ZodOptional<z.ZodBoolean>;
|
|
@@ -70,12 +75,13 @@ export declare const PipelineRequestSchema: z.ZodObject<{
|
|
|
70
75
|
source: z.ZodOptional<z.ZodString>;
|
|
71
76
|
sourceReportId: z.ZodOptional<z.ZodString>;
|
|
72
77
|
taskMode: z.ZodOptional<z.ZodEnum<{
|
|
78
|
+
inline: "inline";
|
|
73
79
|
"content-lake": "content-lake";
|
|
74
80
|
yaml: "yaml";
|
|
75
|
-
inline: "inline";
|
|
76
81
|
}>>;
|
|
77
82
|
tasks: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
78
83
|
urls: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
84
|
+
presets: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
79
85
|
}, z.core.$strip>;
|
|
80
86
|
/** Inferred TypeScript type for a pipeline request payload. */
|
|
81
87
|
export type PipelineRequest = z.infer<typeof PipelineRequestSchema>;
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
* @see packages/eval/src/pipeline/map-request-to-config.ts — maps to ResolvedConfig
|
|
14
14
|
*/
|
|
15
15
|
import { z } from "zod";
|
|
16
|
+
import { RAW_EVAL_MODES } from "../../ailf-shared/index.js";
|
|
16
17
|
// ---------------------------------------------------------------------------
|
|
17
18
|
// Debug options — boolean shorthand or structured object
|
|
18
19
|
// ---------------------------------------------------------------------------
|
|
@@ -69,7 +70,11 @@ export const PipelineRequestSchema = z.object({
|
|
|
69
70
|
headers: z.record(z.string(), z.string()).optional(),
|
|
70
71
|
inlineTasks: z.array(z.record(z.string(), z.unknown())).optional(),
|
|
71
72
|
jobId: z.string().optional(),
|
|
72
|
-
|
|
73
|
+
/**
|
|
74
|
+
* Evaluation mode — accepts both canonical and legacy names.
|
|
75
|
+
* Legacy names must pass through normalizeMode() before entering typed pipeline code.
|
|
76
|
+
*/
|
|
77
|
+
mode: z.enum(RAW_EVAL_MODES).optional(),
|
|
73
78
|
noAutoScope: z.boolean().optional(),
|
|
74
79
|
noCache: z.boolean().optional(),
|
|
75
80
|
noRemoteCache: z.boolean().optional(),
|
|
@@ -84,4 +89,6 @@ export const PipelineRequestSchema = z.object({
|
|
|
84
89
|
taskMode: z.enum(["content-lake", "yaml", "inline"]).optional(),
|
|
85
90
|
tasks: z.array(z.string()).optional(),
|
|
86
91
|
urls: z.array(z.string().url()).optional(),
|
|
92
|
+
/** External preset file paths or npm package names to load */
|
|
93
|
+
presets: z.array(z.string()).optional(),
|
|
87
94
|
});
|