@sanity/ailf 0.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/config/features.ts +23 -0
- package/config/models.ts +95 -0
- package/config/prompts.ts +16 -0
- package/config/rubrics.ts +225 -0
- package/config/schedules.ts +47 -0
- package/config/sinks.ts +37 -0
- package/config/sources.ts +21 -0
- package/config/thresholds.ts +61 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +171 -0
- package/dist/_vendor/ailf-core/config-helpers.js +170 -0
- package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
- package/dist/_vendor/ailf-core/env-helper.js +45 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/examples/index.js +25 -0
- package/dist/_vendor/ailf-core/index.d.ts +3 -0
- package/dist/_vendor/ailf-core/index.js +5 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +17 -2
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
- package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
- package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +8 -2
- package/dist/_vendor/ailf-core/schemas/eval-config.js +17 -2
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +9 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +8 -1
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -31
- package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -9
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
- package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
- package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/services/index.js +2 -1
- package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
- package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
- package/dist/_vendor/ailf-core/services/scoring.js +25 -15
- package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
- package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
- package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +332 -0
- package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +45 -83
- package/dist/_vendor/ailf-core/types/index.js +8 -1
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +257 -0
- package/dist/_vendor/ailf-core/types/plugin-registry.js +185 -0
- package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
- package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
- package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
- package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
- package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
- package/dist/_vendor/ailf-core/types/trace.js +18 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
- package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
- package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
- package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
- package/dist/_vendor/ailf-shared/index.d.ts +0 -1
- package/dist/_vendor/ailf-shared/index.js +0 -1
- package/dist/adapters/api-client/build-request.js +14 -13
- package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
- package/dist/adapters/config-sources/file-config-adapter.js +39 -12
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +1 -0
- package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
- package/dist/adapters/config-sources/ts-config-loader.js +141 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
- package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +35 -39
- package/dist/adapters/task-sources/index.d.ts +3 -2
- package/dist/adapters/task-sources/index.js +3 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
- package/dist/adapters/task-sources/repo-schemas.js +227 -19
- package/dist/adapters/task-sources/repo-task-source.d.ts +16 -12
- package/dist/adapters/task-sources/repo-task-source.js +92 -80
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
- package/dist/adapters/task-sources/task-file-loader.js +83 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
- package/dist/adapters/task-sources/yaml-task-source.js +19 -16
- package/dist/cli.js +0 -2
- package/dist/commands/baseline.js +4 -1
- package/dist/commands/calculate-scores.js +1 -1
- package/dist/commands/coverage-audit.js +9 -1
- package/dist/commands/explain-handler.js +25 -23
- package/dist/commands/fetch-docs.js +3 -2
- package/dist/commands/generate-configs.js +1 -1
- package/dist/commands/init.d.ts +6 -4
- package/dist/commands/init.js +302 -23
- package/dist/commands/interactive.js +11 -7
- package/dist/commands/pipeline-action.d.ts +2 -0
- package/dist/commands/pipeline-action.js +16 -6
- package/dist/commands/pipeline.d.ts +1 -0
- package/dist/commands/pipeline.js +4 -2
- package/dist/commands/pr-comment.js +1 -1
- package/dist/commands/publish.js +2 -2
- package/dist/commands/readiness-report.js +13 -6
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +13 -1
- package/dist/composition-root.js +99 -4
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/orchestration/build-app-context.js +1 -0
- package/dist/orchestration/build-step-sequence.js +28 -8
- package/dist/orchestration/steps/calculate-scores-step.js +24 -11
- package/dist/orchestration/steps/fetch-docs-step.js +8 -7
- package/dist/orchestration/steps/gap-analysis-step.js +8 -7
- package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
- package/dist/orchestration/steps/generate-configs-step.js +261 -51
- package/dist/orchestration/steps/grader-consistency-step.js +7 -4
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/readiness-step.js +5 -6
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
- package/dist/orchestration/steps/run-eval-step.js +8 -7
- package/dist/pipeline/cache.d.ts +1 -1
- package/dist/pipeline/cache.js +36 -8
- package/dist/pipeline/calculate-scores.d.ts +2 -4
- package/dist/pipeline/calculate-scores.js +43 -113
- package/dist/pipeline/checks.js +2 -2
- package/dist/pipeline/compare.js +8 -8
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +392 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +404 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
- package/dist/pipeline/compiler/assertion-mapper.js +175 -0
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
- package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
- package/dist/pipeline/compiler/config-loader.d.ts +56 -0
- package/dist/pipeline/compiler/config-loader.js +111 -0
- package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
- package/dist/pipeline/compiler/fixture-resolver.js +113 -0
- package/dist/pipeline/compiler/hash.d.ts +11 -0
- package/dist/pipeline/compiler/hash.js +18 -0
- package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
- package/dist/pipeline/compiler/ignore-fields.js +113 -0
- package/dist/pipeline/compiler/index.d.ts +29 -0
- package/dist/pipeline/compiler/index.js +45 -0
- package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
- package/dist/pipeline/compiler/literacy-bridge.js +172 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +15 -0
- package/dist/pipeline/compiler/mode-handlers/index.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/index.d.ts +9 -0
- package/dist/pipeline/compiler/presets/index.js +8 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +42 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.js +208 -0
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
- package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
- package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
- package/dist/pipeline/compiler/provider-assembler.js +137 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
- package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
- package/dist/pipeline/compiler/sandbox/index.js +11 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
- package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
- package/dist/pipeline/compiler/scoring-bridge.js +114 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
- package/dist/pipeline/compiler/task-graph-builder.js +291 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
- package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
- package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
- package/dist/pipeline/compiler/telemetry/index.js +19 -0
- package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
- package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
- package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
- package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
- package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
- package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
- package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
- package/dist/pipeline/compiler/variable-resolver.js +115 -0
- package/dist/pipeline/coverage-audit.d.ts +15 -5
- package/dist/pipeline/coverage-audit.js +41 -22
- package/dist/pipeline/eval-constants.d.ts +16 -6
- package/dist/pipeline/eval-constants.js +25 -4
- package/dist/pipeline/eval-fingerprint.d.ts +2 -2
- package/dist/pipeline/eval-fingerprint.js +8 -9
- package/dist/pipeline/expand-tasks.d.ts +19 -10
- package/dist/pipeline/expand-tasks.js +34 -28
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +2 -2
- package/dist/pipeline/generate-configs.d.ts +22 -4
- package/dist/pipeline/generate-configs.js +53 -24
- package/dist/pipeline/grader-api.d.ts +3 -3
- package/dist/pipeline/grader-api.js +5 -12
- package/dist/pipeline/grader-compare-runner.js +20 -27
- package/dist/pipeline/grader-comparison.d.ts +4 -8
- package/dist/pipeline/grader-comparison.js +11 -17
- package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
- package/dist/pipeline/grader-consistency-runner.js +16 -20
- package/dist/pipeline/grader-consistency.d.ts +6 -10
- package/dist/pipeline/grader-consistency.js +13 -32
- package/dist/pipeline/grader-sensitivity-runner.js +7 -5
- package/dist/pipeline/grader-sensitivity.d.ts +2 -6
- package/dist/pipeline/grader-sensitivity.js +10 -10
- package/dist/pipeline/grader-validate-runner.js +7 -5
- package/dist/pipeline/grader-validation.d.ts +2 -6
- package/dist/pipeline/grader-validation.js +14 -22
- package/dist/pipeline/map-request-to-config.js +7 -1
- package/dist/pipeline/mirror-repo-tasks.d.ts +13 -13
- package/dist/pipeline/mirror-repo-tasks.js +22 -21
- package/dist/pipeline/normalize-mode.d.ts +49 -0
- package/dist/pipeline/normalize-mode.js +64 -0
- package/dist/pipeline/plan.d.ts +5 -2
- package/dist/pipeline/plan.js +134 -78
- package/dist/pipeline/pr-comment.js +2 -0
- package/dist/pipeline/profile-resolution.d.ts +22 -14
- package/dist/pipeline/profile-resolution.js +41 -19
- package/dist/pipeline/provenance.d.ts +2 -2
- package/dist/pipeline/provenance.js +12 -17
- package/dist/pipeline/release-report.js +4 -4
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/rubric-loader.d.ts +20 -0
- package/dist/pipeline/rubric-loader.js +37 -0
- package/dist/pipeline/validate.d.ts +4 -4
- package/dist/pipeline/validate.js +64 -53
- package/dist/schedules/loader.js +18 -8
- package/dist/scripts/migrate-task-mode.d.ts +24 -0
- package/dist/scripts/migrate-task-mode.js +85 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +15 -15
- package/dist/sinks/loader.js +5 -7
- package/dist/sources.d.ts +7 -7
- package/dist/sources.js +22 -24
- package/dist/webhook/dispatch.js +2 -1
- package/package.json +15 -4
- package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
- package/tasks/literacy/frameworks.task.ts +128 -0
- package/tasks/literacy/functions.task.ts +69 -0
- package/tasks/literacy/groq.task.ts +258 -0
- package/tasks/literacy/nextjs-live.task.ts +75 -0
- package/tasks/literacy/studio-setup.task.ts +131 -0
- package/tasks/literacy/visual-editing.task.ts +146 -0
- package/config/features.yaml +0 -116
- package/config/models.yaml +0 -116
- package/config/prompts.yaml +0 -75
- package/config/rubrics.yaml +0 -81
- package/config/schedules.yaml +0 -43
- package/config/sinks.yaml +0 -54
- package/config/sources.yaml +0 -51
- package/config/thresholds.yaml +0 -49
- package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
- package/dist/_vendor/ailf-tasks/cli.js +0 -61
- package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
- package/dist/_vendor/ailf-tasks/index.js +0 -16
- package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
- package/dist/_vendor/ailf-tasks/parser.js +0 -73
- package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
- package/dist/_vendor/ailf-tasks/schemas.js +0 -180
- package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
- package/dist/_vendor/ailf-tasks/validation.js +0 -162
- package/dist/agent-observer/test-imports.d.ts +0 -7
- package/dist/agent-observer/test-imports.js +0 -185
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* validate-tasks command — standalone validation of
|
|
2
|
+
* validate-tasks command — standalone validation of task files.
|
|
3
3
|
*
|
|
4
|
-
* Validates .ailf/tasks/*.yaml files against the
|
|
4
|
+
* Validates .ailf/tasks/*.yaml files against the CanonicalTaskSchema without
|
|
5
5
|
* running the full pipeline. Useful for pre-commit hooks and CI checks
|
|
6
6
|
* in external repos.
|
|
7
7
|
*
|
|
@@ -16,11 +16,11 @@ import { existsSync, readdirSync, readFileSync } from "fs";
|
|
|
16
16
|
import { resolve, relative } from "path";
|
|
17
17
|
import { Command } from "commander";
|
|
18
18
|
import { load } from "js-yaml";
|
|
19
|
-
import {
|
|
20
|
-
import {
|
|
19
|
+
import { detectLegacyFieldNames, parseCanonicalTaskFile, } from "../adapters/task-sources/repo-schemas.js";
|
|
20
|
+
import { validateCanonicalTasks, formatValidationResult, } from "../adapters/task-sources/repo-validation.js";
|
|
21
21
|
export function createValidateTasksCommand() {
|
|
22
22
|
return new Command("validate-tasks")
|
|
23
|
-
.description("Validate
|
|
23
|
+
.description("Validate task YAML files (.ailf/tasks/) against the canonical schema")
|
|
24
24
|
.argument("[path]", "Path to tasks directory (default: .ailf/tasks/)", ".ailf/tasks")
|
|
25
25
|
.option("--strict", "Treat warnings as errors", false)
|
|
26
26
|
.action(async (tasksPath, opts) => {
|
|
@@ -29,12 +29,12 @@ export function createValidateTasksCommand() {
|
|
|
29
29
|
const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
|
|
30
30
|
const resolvedPath = resolve(callerCwd, tasksPath);
|
|
31
31
|
if (!existsSync(resolvedPath)) {
|
|
32
|
-
console.error(
|
|
32
|
+
console.error(`Directory not found: ${resolvedPath}`);
|
|
33
33
|
process.exit(1);
|
|
34
34
|
}
|
|
35
35
|
const yamlFiles = readdirSync(resolvedPath).filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."));
|
|
36
36
|
if (yamlFiles.length === 0) {
|
|
37
|
-
console.error(
|
|
37
|
+
console.error(`No YAML files found in ${resolvedPath}`);
|
|
38
38
|
process.exit(1);
|
|
39
39
|
}
|
|
40
40
|
console.log(`\nValidating ${yamlFiles.length} task file(s) in ${relative(process.cwd(), resolvedPath)}/\n`);
|
|
@@ -50,25 +50,36 @@ export function createValidateTasksCommand() {
|
|
|
50
50
|
}
|
|
51
51
|
catch (err) {
|
|
52
52
|
const msg = err instanceof Error ? err.message : String(err);
|
|
53
|
-
console.error(`
|
|
53
|
+
console.error(` ${file}: YAML parse error`);
|
|
54
54
|
console.error(` ${msg}\n`);
|
|
55
55
|
hasErrors = true;
|
|
56
56
|
continue;
|
|
57
57
|
}
|
|
58
58
|
if (!Array.isArray(parsed)) {
|
|
59
|
-
console.error(`
|
|
59
|
+
console.error(` ${file}: Expected a YAML array of task definitions`);
|
|
60
|
+
hasErrors = true;
|
|
61
|
+
continue;
|
|
62
|
+
}
|
|
63
|
+
// Detect legacy field names before Zod validation
|
|
64
|
+
const legacyWarnings = detectLegacyFieldNames(parsed, file);
|
|
65
|
+
if (legacyWarnings.length > 0) {
|
|
66
|
+
console.error(` ${file}: Uses legacy field names`);
|
|
67
|
+
for (const w of legacyWarnings) {
|
|
68
|
+
console.error(` ${w}`);
|
|
69
|
+
}
|
|
70
|
+
console.error();
|
|
60
71
|
hasErrors = true;
|
|
61
72
|
continue;
|
|
62
73
|
}
|
|
63
74
|
try {
|
|
64
|
-
const tasks =
|
|
65
|
-
console.log(`
|
|
75
|
+
const tasks = parseCanonicalTaskFile(parsed, file);
|
|
76
|
+
console.log(` ${file}: ${tasks.length} task${tasks.length === 1 ? "" : "s"} valid`);
|
|
66
77
|
totalTasks += tasks.length;
|
|
67
78
|
allTasks.push(...tasks);
|
|
68
79
|
}
|
|
69
80
|
catch (err) {
|
|
70
81
|
const msg = err instanceof Error ? err.message : String(err);
|
|
71
|
-
console.error(`
|
|
82
|
+
console.error(` ${file}: Schema validation failed`);
|
|
72
83
|
console.error(`${msg
|
|
73
84
|
.split("\n")
|
|
74
85
|
.map((l) => ` ${l}`)
|
|
@@ -79,7 +90,7 @@ export function createValidateTasksCommand() {
|
|
|
79
90
|
// Run semantic validation on all parsed tasks
|
|
80
91
|
if (allTasks.length > 0) {
|
|
81
92
|
console.log(); // blank line
|
|
82
|
-
const semanticResult =
|
|
93
|
+
const semanticResult = validateCanonicalTasks(allTasks);
|
|
83
94
|
const formatted = formatValidationResult(semanticResult);
|
|
84
95
|
console.log(formatted);
|
|
85
96
|
if (!semanticResult.valid) {
|
|
@@ -87,10 +98,10 @@ export function createValidateTasksCommand() {
|
|
|
87
98
|
}
|
|
88
99
|
if (opts.strict && semanticResult.warnings.length > 0) {
|
|
89
100
|
hasErrors = true;
|
|
90
|
-
console.log("\n
|
|
101
|
+
console.log("\n --strict mode: warnings treated as errors");
|
|
91
102
|
}
|
|
92
103
|
}
|
|
93
|
-
console.log(`\n${hasErrors ? "
|
|
104
|
+
console.log(`\n${hasErrors ? "FAIL" : "OK"} ${totalTasks} task${totalTasks === 1 ? "" : "s"} across ${yamlFiles.length} file${yamlFiles.length === 1 ? "" : "s"}\n`);
|
|
94
105
|
process.exit(hasErrors ? 1 : 0);
|
|
95
106
|
});
|
|
96
107
|
}
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
* @see packages/core/src/ports/context.ts — AppContext interface
|
|
16
16
|
* @see docs/exec-plans/ports-and-adapters/phase-7-composition-root.md
|
|
17
17
|
*/
|
|
18
|
-
import type
|
|
18
|
+
import { type AppContext, type AssertionRegistration, type ResolvedConfig } from "./_vendor/ailf-core/index.d.ts";
|
|
19
19
|
/**
|
|
20
20
|
* Create a fully wired AppContext from resolved configuration.
|
|
21
21
|
*
|
|
@@ -24,3 +24,15 @@ import type { AppContext, ResolvedConfig } from "./_vendor/ailf-core/index.d.ts"
|
|
|
24
24
|
* is a one-line change in this function.
|
|
25
25
|
*/
|
|
26
26
|
export declare function createAppContext(config: ResolvedConfig): AppContext;
|
|
27
|
+
/**
|
|
28
|
+
* Generic Promptfoo assertion types available to all evaluation modes.
|
|
29
|
+
*
|
|
30
|
+
* These are evaluation primitives (text matching, JSON validation, LLM grading)
|
|
31
|
+
* that aren't specific to any mode or domain. They're registered before any
|
|
32
|
+
* preset so every mode has access to them.
|
|
33
|
+
*
|
|
34
|
+
* `compatibleModes` is omitted — when undefined, the assertion is compatible
|
|
35
|
+
* with all modes. Mode-specific assertions can be registered by presets with
|
|
36
|
+
* explicit mode whitelists.
|
|
37
|
+
*/
|
|
38
|
+
export declare const FRAMEWORK_ASSERTIONS: AssertionRegistration[];
|
package/dist/composition-root.js
CHANGED
|
@@ -15,12 +15,15 @@
|
|
|
15
15
|
* @see packages/core/src/ports/context.ts — AppContext interface
|
|
16
16
|
* @see docs/exec-plans/ports-and-adapters/phase-7-composition-root.md
|
|
17
17
|
*/
|
|
18
|
+
import { InMemoryPluginRegistry, } from "./_vendor/ailf-core/index.js";
|
|
18
19
|
import { ContentLakeCacheAdapter } from "./adapters/cache/content-lake-cache.js";
|
|
20
|
+
import { loadExternalPresets } from "./pipeline/compiler/preset-loader.js";
|
|
19
21
|
import { FilesystemCache } from "./adapters/cache/filesystem-cache.js";
|
|
20
|
-
import { SanityDocFetcher } from "./adapters/doc-fetchers/index.js";
|
|
21
22
|
import { PromptfooEvalAdapter } from "./adapters/eval-runners/promptfoo-eval-adapter.js";
|
|
22
23
|
import { ConsoleLogger, JsonLogger, QuietLogger, } from "./adapters/loggers/index.js";
|
|
23
24
|
import { CompositeTaskSource, ContentLakeTaskSource, RepoTaskSource, YamlTaskSource, } from "./adapters/task-sources/index.js";
|
|
25
|
+
import { createAgentHarnessBase, createKnowledgeProbeBase, createLiteracyModeBase, createMcpServerModeBase, } from "./pipeline/compiler/mode-bases/index.js";
|
|
26
|
+
import { createSanityLiteracyPreset } from "./pipeline/compiler/presets/index.js";
|
|
24
27
|
import { getSanityClient } from "./sanity/client.js";
|
|
25
28
|
import { ReportStore } from "./report-store.js";
|
|
26
29
|
import { loadSinks } from "./sinks/index.js";
|
|
@@ -38,13 +41,20 @@ export function createAppContext(config) {
|
|
|
38
41
|
const cache = config.noCache ? undefined : createCache(config);
|
|
39
42
|
// Task source — selected by config.taskSourceType
|
|
40
43
|
const taskSource = createTaskSource(config);
|
|
41
|
-
//
|
|
42
|
-
|
|
44
|
+
// Plugin registry — mode bases, assertions, presets, doc fetcher.
|
|
45
|
+
// External presets from config are loaded and registered after built-ins.
|
|
46
|
+
const externalPresets = config.presets && config.presets.length > 0
|
|
47
|
+
? loadExternalPresets(config.presets, config.rootDir)
|
|
48
|
+
: undefined;
|
|
49
|
+
const registry = createRegistry(config.rootDir, externalPresets);
|
|
50
|
+
// Doc fetcher — provided by the registered preset's factory
|
|
51
|
+
const docFetcherFactory = registry.getDocFetcherFactory();
|
|
52
|
+
const docFetcher = docFetcherFactory ? docFetcherFactory() : undefined;
|
|
43
53
|
// Eval runner — Promptfoo subprocess
|
|
44
54
|
const evalRunner = new PromptfooEvalAdapter(config.rootDir);
|
|
45
55
|
// Report store — Sanity Content Lake (for publish + auto-compare)
|
|
46
56
|
const reportStore = createReportStore(config);
|
|
47
|
-
// Sinks — loaded from config/sinks
|
|
57
|
+
// Sinks — loaded from config/sinks
|
|
48
58
|
const sinks = loadSinks();
|
|
49
59
|
return {
|
|
50
60
|
cache,
|
|
@@ -52,6 +62,7 @@ export function createAppContext(config) {
|
|
|
52
62
|
docFetcher,
|
|
53
63
|
evalRunner,
|
|
54
64
|
logger,
|
|
65
|
+
registry,
|
|
55
66
|
reportStore,
|
|
56
67
|
sinks,
|
|
57
68
|
taskSource,
|
|
@@ -113,6 +124,90 @@ function createTaskSource(config) {
|
|
|
113
124
|
}
|
|
114
125
|
return primary;
|
|
115
126
|
}
|
|
127
|
+
// ---------------------------------------------------------------------------
|
|
128
|
+
// Layer 0: Framework built-in assertions
|
|
129
|
+
// ---------------------------------------------------------------------------
|
|
130
|
+
/**
|
|
131
|
+
* Generic Promptfoo assertion types available to all evaluation modes.
|
|
132
|
+
*
|
|
133
|
+
* These are evaluation primitives (text matching, JSON validation, LLM grading)
|
|
134
|
+
* that aren't specific to any mode or domain. They're registered before any
|
|
135
|
+
* preset so every mode has access to them.
|
|
136
|
+
*
|
|
137
|
+
* `compatibleModes` is omitted — when undefined, the assertion is compatible
|
|
138
|
+
* with all modes. Mode-specific assertions can be registered by presets with
|
|
139
|
+
* explicit mode whitelists.
|
|
140
|
+
*/
|
|
141
|
+
export const FRAMEWORK_ASSERTIONS = [
|
|
142
|
+
{
|
|
143
|
+
type: "contains",
|
|
144
|
+
label: "Contains text",
|
|
145
|
+
handlerModule: "promptfoo:builtin",
|
|
146
|
+
},
|
|
147
|
+
{
|
|
148
|
+
type: "contains-all",
|
|
149
|
+
label: "Contains all texts",
|
|
150
|
+
handlerModule: "promptfoo:builtin",
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
type: "contains-any",
|
|
154
|
+
label: "Contains any text",
|
|
155
|
+
handlerModule: "promptfoo:builtin",
|
|
156
|
+
},
|
|
157
|
+
{ type: "equals", label: "Exact match", handlerModule: "promptfoo:builtin" },
|
|
158
|
+
{ type: "regex", label: "Regex match", handlerModule: "promptfoo:builtin" },
|
|
159
|
+
{ type: "is-json", label: "Valid JSON", handlerModule: "promptfoo:builtin" },
|
|
160
|
+
{
|
|
161
|
+
type: "javascript",
|
|
162
|
+
label: "JavaScript assertion",
|
|
163
|
+
handlerModule: "promptfoo:builtin",
|
|
164
|
+
},
|
|
165
|
+
{
|
|
166
|
+
type: "llm-rubric",
|
|
167
|
+
label: "LLM-graded rubric",
|
|
168
|
+
handlerModule: "promptfoo:builtin",
|
|
169
|
+
},
|
|
170
|
+
{
|
|
171
|
+
type: "similar",
|
|
172
|
+
label: "Semantic similarity",
|
|
173
|
+
handlerModule: "promptfoo:builtin",
|
|
174
|
+
},
|
|
175
|
+
];
|
|
176
|
+
/**
|
|
177
|
+
* Build and populate the plugin registry.
|
|
178
|
+
*
|
|
179
|
+
* Registration follows the five-layer model:
|
|
180
|
+
*
|
|
181
|
+
* Layer 0: Framework built-in assertions (generic Promptfoo builtins)
|
|
182
|
+
* Layer 0.5: Mode bases (shared evaluation methodology per mode)
|
|
183
|
+
* Layer 1: Domain presets (domain-specific config targeting a mode base)
|
|
184
|
+
*
|
|
185
|
+
* Mode bases define HOW you evaluate (rubrics, scoring, prompts).
|
|
186
|
+
* Domain presets define WHAT you evaluate (sources, features, doc fetcher)
|
|
187
|
+
* and target a mode base by ID. When a preset is registered, it inherits
|
|
188
|
+
* its mode base's defaults and can optionally override them.
|
|
189
|
+
*/
|
|
190
|
+
function createRegistry(rootDir, externalPresets) {
|
|
191
|
+
const registry = new InMemoryPluginRegistry();
|
|
192
|
+
// Layer 0: Framework built-in assertions (available to all modes)
|
|
193
|
+
for (const assertion of FRAMEWORK_ASSERTIONS) {
|
|
194
|
+
registry.registerAssertion(assertion);
|
|
195
|
+
}
|
|
196
|
+
// Layer 0.5: Mode bases (evaluation methodology)
|
|
197
|
+
registry.registerModeBase(createLiteracyModeBase());
|
|
198
|
+
registry.registerModeBase(createMcpServerModeBase());
|
|
199
|
+
registry.registerModeBase(createKnowledgeProbeBase());
|
|
200
|
+
registry.registerModeBase(createAgentHarnessBase());
|
|
201
|
+
// Layer 1: Built-in domain presets
|
|
202
|
+
registry.registerPreset(createSanityLiteracyPreset({ rootDir }));
|
|
203
|
+
// Layer 1+: External domain presets (from config.presets)
|
|
204
|
+
if (externalPresets) {
|
|
205
|
+
for (const preset of externalPresets) {
|
|
206
|
+
registry.registerPreset(preset);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
return registry;
|
|
210
|
+
}
|
|
116
211
|
function createReportStore(config) {
|
|
117
212
|
return new ReportStore({
|
|
118
213
|
dataset: process.env.AILF_REPORT_DATASET ??
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @sanity/ailf — Public API for the AI Literacy Framework.
|
|
3
|
+
*
|
|
4
|
+
* This module is the entry point for external consumers who import from
|
|
5
|
+
* `@sanity/ailf`. It re-exports the authoring API needed to write task
|
|
6
|
+
* definitions, configuration files, and validate task YAML.
|
|
7
|
+
*
|
|
8
|
+
* ## Task authoring
|
|
9
|
+
*
|
|
10
|
+
* ```typescript
|
|
11
|
+
* import { defineTask } from "@sanity/ailf"
|
|
12
|
+
*
|
|
13
|
+
* export default defineTask({
|
|
14
|
+
* id: "groq-projection-basics",
|
|
15
|
+
* mode: "literacy",
|
|
16
|
+
* title: "GROQ Projection Basics",
|
|
17
|
+
* area: "groq",
|
|
18
|
+
* prompt: { text: "Write GROQ queries..." },
|
|
19
|
+
* assertions: [
|
|
20
|
+
* { type: "llm-rubric", template: "task-completion", criteria: ["..."] },
|
|
21
|
+
* ],
|
|
22
|
+
* })
|
|
23
|
+
* ```
|
|
24
|
+
*
|
|
25
|
+
* ## Configuration authoring
|
|
26
|
+
*
|
|
27
|
+
* ```typescript
|
|
28
|
+
* import { defineConfig, env } from "@sanity/ailf"
|
|
29
|
+
*
|
|
30
|
+
* export default defineConfig({
|
|
31
|
+
* projectId: env("SANITY_PROJECT_ID"),
|
|
32
|
+
* dataset: env("SANITY_DATASET"),
|
|
33
|
+
* })
|
|
34
|
+
* ```
|
|
35
|
+
*/
|
|
36
|
+
export { defineConfig, defineFeatures, defineModels, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./_vendor/ailf-core/index.d.ts";
|
|
37
|
+
export type { PricingEntry, PromptEntry, SourceEntry } from "./_vendor/ailf-core/index.d.ts";
|
|
38
|
+
export { env } from "./_vendor/ailf-core/index.d.ts";
|
|
39
|
+
export type { AgentHarnessTaskDefinition, CustomTaskDefinition, GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, GeneralizedValueAssertion, IdDocRef, KnowledgeProbeTaskDefinition, LiteracyTaskDefinition, MCPServerTaskDefinition, PathDocRef, PerspectiveDocRef, RubricRef, SlugDocRef, TaskCommonFields, TaskDifficulty, TaskOptions, TaskProviderConfig, TaskStatus, } from "./_vendor/ailf-core/index.d.ts";
|
|
40
|
+
export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, RUBRIC_TEMPLATE_NAMES, type CanonicalTask, type CuratedAssertionType, type RubricTemplateName, } from "./adapters/task-sources/repo-schemas.js";
|
|
41
|
+
export { formatValidationResult, validateCanonicalTasks, type ValidationMessage, type ValidationResult, } from "./adapters/task-sources/repo-validation.js";
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @sanity/ailf — Public API for the AI Literacy Framework.
|
|
3
|
+
*
|
|
4
|
+
* This module is the entry point for external consumers who import from
|
|
5
|
+
* `@sanity/ailf`. It re-exports the authoring API needed to write task
|
|
6
|
+
* definitions, configuration files, and validate task YAML.
|
|
7
|
+
*
|
|
8
|
+
* ## Task authoring
|
|
9
|
+
*
|
|
10
|
+
* ```typescript
|
|
11
|
+
* import { defineTask } from "@sanity/ailf"
|
|
12
|
+
*
|
|
13
|
+
* export default defineTask({
|
|
14
|
+
* id: "groq-projection-basics",
|
|
15
|
+
* mode: "literacy",
|
|
16
|
+
* title: "GROQ Projection Basics",
|
|
17
|
+
* area: "groq",
|
|
18
|
+
* prompt: { text: "Write GROQ queries..." },
|
|
19
|
+
* assertions: [
|
|
20
|
+
* { type: "llm-rubric", template: "task-completion", criteria: ["..."] },
|
|
21
|
+
* ],
|
|
22
|
+
* })
|
|
23
|
+
* ```
|
|
24
|
+
*
|
|
25
|
+
* ## Configuration authoring
|
|
26
|
+
*
|
|
27
|
+
* ```typescript
|
|
28
|
+
* import { defineConfig, env } from "@sanity/ailf"
|
|
29
|
+
*
|
|
30
|
+
* export default defineConfig({
|
|
31
|
+
* projectId: env("SANITY_PROJECT_ID"),
|
|
32
|
+
* dataset: env("SANITY_DATASET"),
|
|
33
|
+
* })
|
|
34
|
+
* ```
|
|
35
|
+
*/
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
// Configuration helpers (define* identity functions for typed authoring)
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
export { defineConfig, defineFeatures, defineModels, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./_vendor/ailf-core/index.js";
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
// Environment helper
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
export { env } from "./_vendor/ailf-core/index.js";
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
// Validation — for programmatic validation of task YAML
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
export { CanonicalTaskFileSchema, CanonicalTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, RUBRIC_TEMPLATE_NAMES, } from "./adapters/task-sources/repo-schemas.js";
|
|
48
|
+
export { formatValidationResult, validateCanonicalTasks, } from "./adapters/task-sources/repo-validation.js";
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* PipelineStep objects determined by config flags like skipFetch,
|
|
6
6
|
* skipEval, compareEnabled, etc.
|
|
7
7
|
*/
|
|
8
|
-
import {
|
|
8
|
+
import { LiteracyVariant } from "../pipeline/normalize-mode.js";
|
|
9
9
|
import { CallbackStep } from "./steps/callback-step.js";
|
|
10
10
|
import { CalculateScoresStep } from "./steps/calculate-scores-step.js";
|
|
11
11
|
import { CompareStep } from "./steps/compare-step.js";
|
|
@@ -35,16 +35,36 @@ export function buildStepSequence(ctx, pipelineStart = Date.now()) {
|
|
|
35
35
|
if (config.repoTasksPath) {
|
|
36
36
|
steps.push(new MirrorRepoTasksStep());
|
|
37
37
|
}
|
|
38
|
-
// Step 1: Fetch documentation (
|
|
39
|
-
|
|
38
|
+
// Step 1: Fetch documentation (literacy mode only — other modes don't use canonical docs)
|
|
39
|
+
if (config.mode === "literacy") {
|
|
40
|
+
steps.push(new FetchDocsStep());
|
|
41
|
+
}
|
|
40
42
|
// Step 2: Generate Promptfoo configs
|
|
41
43
|
steps.push(new GenerateConfigsStep());
|
|
42
44
|
// Step 3: Run evaluation (steps handle --skip-eval internally)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
45
|
+
//
|
|
46
|
+
// For literacy mode, the variant determines how many eval steps run:
|
|
47
|
+
// "full" → baseline + agentic (two steps)
|
|
48
|
+
// "baseline" / "agentic" / "observed" → one step
|
|
49
|
+
// undefined → defaults to baseline
|
|
50
|
+
//
|
|
51
|
+
// For all other modes, one eval step per mode.
|
|
52
|
+
if (config.mode === "literacy") {
|
|
53
|
+
const variant = config.variant ?? LiteracyVariant.STANDARD;
|
|
54
|
+
if (variant === LiteracyVariant.FULL) {
|
|
55
|
+
for (const submode of [
|
|
56
|
+
LiteracyVariant.STANDARD,
|
|
57
|
+
LiteracyVariant.AGENTIC,
|
|
58
|
+
]) {
|
|
59
|
+
steps.push(new RunEvalStep(submode));
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
else {
|
|
63
|
+
steps.push(new RunEvalStep(variant));
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
else {
|
|
67
|
+
steps.push(new RunEvalStep(config.mode));
|
|
48
68
|
}
|
|
49
69
|
// Step 3c: Grader consistency (optional, conditional)
|
|
50
70
|
if (config.graderReplications) {
|
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
* typed options derived from AppContext. No env bridge needed.
|
|
6
6
|
*/
|
|
7
7
|
import { join } from "path";
|
|
8
|
-
import {
|
|
8
|
+
import { LiteracyVariant } from "../../pipeline/normalize-mode.js";
|
|
9
9
|
import { getStepInputPaths } from "../../pipeline/cache.js";
|
|
10
10
|
import { calculateAndWriteScores } from "../../pipeline/calculate-scores.js";
|
|
11
11
|
import { checkResultsExist, checkScoreSummaryValid, } from "../../pipeline/checks.js";
|
|
12
|
-
import {
|
|
12
|
+
import { resultsFileForMode } from "../../pipeline/eval-constants.js";
|
|
13
13
|
import { loadSource } from "../../sources.js";
|
|
14
14
|
import { configToSourceOverrides } from "../config-to-source-overrides.js";
|
|
15
15
|
export class CalculateScoresStep {
|
|
@@ -23,10 +23,14 @@ export class CalculateScoresStep {
|
|
|
23
23
|
// score-summary.json was already restored from the cached report.
|
|
24
24
|
// Skip re-calculation — the raw eval-results files don't exist.
|
|
25
25
|
if (state.remoteCacheHits?.size) {
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
26
|
+
// For literacy mode, determine required eval runs from variant
|
|
27
|
+
const variant = ctx.config.variant ?? LiteracyVariant.STANDARD;
|
|
28
|
+
const requiredRuns = ctx.config.mode === "literacy" && variant === LiteracyVariant.FULL
|
|
29
|
+
? [LiteracyVariant.STANDARD, LiteracyVariant.AGENTIC]
|
|
30
|
+
: ctx.config.mode === "literacy"
|
|
31
|
+
? [variant]
|
|
32
|
+
: [ctx.config.mode];
|
|
33
|
+
const allCached = requiredRuns.every((m) => state.remoteCacheHits.has(m));
|
|
30
34
|
if (allCached) {
|
|
31
35
|
// Verify the restored score-summary.json is valid
|
|
32
36
|
const summaryIssues = checkScoreSummaryValid(ctx.config.rootDir);
|
|
@@ -40,10 +44,15 @@ export class CalculateScoresStep {
|
|
|
40
44
|
// If the summary is invalid, fall through to normal calculation
|
|
41
45
|
}
|
|
42
46
|
}
|
|
43
|
-
|
|
44
|
-
|
|
47
|
+
// Primary results file to score.
|
|
48
|
+
// For literacy: "full" variant uses baseline as primary; others use variant directly.
|
|
49
|
+
// For other modes: use the mode name.
|
|
50
|
+
const primaryResultsRun = ctx.config.mode === "literacy"
|
|
51
|
+
? ctx.config.variant === LiteracyVariant.FULL
|
|
52
|
+
? LiteracyVariant.STANDARD
|
|
53
|
+
: (ctx.config.variant ?? LiteracyVariant.STANDARD)
|
|
45
54
|
: ctx.config.mode;
|
|
46
|
-
const resultsFile =
|
|
55
|
+
const resultsFile = resultsFileForMode(primaryResultsRun);
|
|
47
56
|
// Precondition: results file exists
|
|
48
57
|
const resultsIssues = checkResultsExist(ctx.config.rootDir, resultsFile);
|
|
49
58
|
const resultsErrors = resultsIssues.filter((i) => i.severity === "error");
|
|
@@ -68,9 +77,13 @@ export class CalculateScoresStep {
|
|
|
68
77
|
const result = calculateAndWriteScores({
|
|
69
78
|
allowedOrigins: ctx.config.allowedOrigins,
|
|
70
79
|
logger: ctx.logger,
|
|
71
|
-
|
|
80
|
+
// Pass the variant for literacy (scoring uses it to decide
|
|
81
|
+
// whether to read agentic results), or mode for other modes
|
|
82
|
+
mode: ctx.config.mode === "literacy"
|
|
83
|
+
? (ctx.config.variant ?? LiteracyVariant.STANDARD)
|
|
84
|
+
: ctx.config.mode,
|
|
72
85
|
resolvedSource,
|
|
73
|
-
resultsPath:
|
|
86
|
+
resultsPath: primaryResultsRun !== LiteracyVariant.STANDARD
|
|
74
87
|
? join(ctx.config.rootDir, resultsFile)
|
|
75
88
|
: undefined,
|
|
76
89
|
rootDir: ctx.config.rootDir,
|
|
@@ -28,13 +28,14 @@ export class FetchDocsStep {
|
|
|
28
28
|
}
|
|
29
29
|
const start = Date.now();
|
|
30
30
|
// Precondition: at least one task has canonical doc mappings
|
|
31
|
-
const
|
|
32
|
-
|
|
31
|
+
const allTasks = await ctx.taskSource.loadTasks(buildFilter(ctx));
|
|
32
|
+
// Bridge: narrow to literacy tasks for canonical doc access
|
|
33
|
+
const literacyTasks = allTasks.filter((t) => t.mode === "literacy");
|
|
34
|
+
const tasksWithDocs = literacyTasks.filter((t) => (t.context?.docs?.length ?? 0) > 0);
|
|
33
35
|
if (tasksWithDocs.length === 0) {
|
|
34
36
|
return {
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
status: "failed",
|
|
37
|
+
status: "skipped",
|
|
38
|
+
reason: "No literacy tasks with canonical_docs — nothing to fetch",
|
|
38
39
|
};
|
|
39
40
|
}
|
|
40
41
|
// Resolve source once with typed overrides
|
|
@@ -100,7 +101,7 @@ export class FetchDocsStep {
|
|
|
100
101
|
if (resolvedSource.perspective &&
|
|
101
102
|
releaseImpact &&
|
|
102
103
|
!ctx.config.noAutoScope) {
|
|
103
|
-
const autoScope = computeAutoScope(
|
|
104
|
+
const autoScope = computeAutoScope(literacyTasks, releaseImpact, resolvedSource.perspective);
|
|
104
105
|
if (autoScope) {
|
|
105
106
|
state.releaseAutoScope = autoScope;
|
|
106
107
|
logAutoScope(autoScope);
|
|
@@ -171,7 +172,7 @@ function writeMetadataFiles(rootDir, metadata) {
|
|
|
171
172
|
*/
|
|
172
173
|
function extractSlugsFromTask(task) {
|
|
173
174
|
const slugs = [];
|
|
174
|
-
for (const ref of task.
|
|
175
|
+
for (const ref of task.context?.docs ?? []) {
|
|
175
176
|
if (isSlugRef(ref)) {
|
|
176
177
|
slugs.push(ref.slug);
|
|
177
178
|
}
|
|
@@ -89,7 +89,7 @@ export class GapAnalysisStep {
|
|
|
89
89
|
const areaToDocRefs = new Map();
|
|
90
90
|
let tasks = [];
|
|
91
91
|
try {
|
|
92
|
-
tasks = await ctx.taskSource.loadTasks();
|
|
92
|
+
tasks = (await ctx.taskSource.loadTasks()).filter((t) => t.mode === "literacy");
|
|
93
93
|
}
|
|
94
94
|
catch {
|
|
95
95
|
// TaskSource may not be available in all contexts (e.g., standalone
|
|
@@ -99,17 +99,18 @@ export class GapAnalysisStep {
|
|
|
99
99
|
// Group tasks by feature area and build slug maps
|
|
100
100
|
const byArea = new Map();
|
|
101
101
|
for (const task of tasks) {
|
|
102
|
-
const slugs = extractSlugsFromRefs(task.
|
|
102
|
+
const slugs = extractSlugsFromRefs(task.context?.docs ?? []);
|
|
103
103
|
const refs = resolveRefs(slugs);
|
|
104
|
-
// Map by
|
|
105
|
-
descToDocRefs.set(task.
|
|
104
|
+
// Map by title (what judgments use as taskId)
|
|
105
|
+
descToDocRefs.set(task.title, refs);
|
|
106
106
|
// Also map by task ID for prefix-based matching
|
|
107
107
|
descToDocRefs.set(task.id, refs);
|
|
108
108
|
// Group slugs by feature area
|
|
109
|
-
|
|
110
|
-
|
|
109
|
+
const area = task.area ?? "";
|
|
110
|
+
if (!byArea.has(area))
|
|
111
|
+
byArea.set(area, new Set());
|
|
111
112
|
for (const s of slugs)
|
|
112
|
-
byArea.get(
|
|
113
|
+
byArea.get(area).add(s);
|
|
113
114
|
}
|
|
114
115
|
for (const [area, slugs] of byArea) {
|
|
115
116
|
areaToDocRefs.set(area, resolveRefs([...slugs]));
|
|
@@ -1,14 +1,27 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Pipeline step: Generate Promptfoo configuration files.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* ALL modes route through the Plugin Registry. The step looks up the mode
|
|
5
|
+
* handler via ctx.registry.getMode() and delegates compilation to it.
|
|
6
|
+
*
|
|
7
|
+
* Literacy mode has a variant strategy: baseline/agentic/observed/full.
|
|
8
|
+
* When the variant is "full", the handler is called twice (baseline + agentic)
|
|
9
|
+
* and three YAML files are written. Other modes produce one YAML file.
|
|
7
10
|
*/
|
|
8
11
|
import type { AppContext, PipelineState, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
9
12
|
export declare class GenerateConfigsStep implements PipelineStep {
|
|
10
13
|
readonly name = "generate-configs";
|
|
11
14
|
check(ctx: AppContext): ValidationIssue[];
|
|
12
15
|
execute(ctx: AppContext, state: PipelineState): Promise<StepResult>;
|
|
16
|
+
private compileLiteracyVariants;
|
|
17
|
+
private compileSingleMode;
|
|
18
|
+
private loadTasks;
|
|
19
|
+
private applyFilters;
|
|
20
|
+
/**
|
|
21
|
+
* Compile all tasks through a handler, merging results.
|
|
22
|
+
* For literacy mode, ctx can carry evalMode as an extension.
|
|
23
|
+
*/
|
|
24
|
+
private compileAll;
|
|
25
|
+
private checkLiteracyPostconditions;
|
|
13
26
|
cacheInputs(ctx: AppContext): string[];
|
|
14
27
|
}
|