@sanity/ailf 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +29 -12
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
- package/dist/_vendor/ailf-core/config-helpers.js +51 -2
- package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
- package/dist/_vendor/ailf-core/examples/index.js +213 -94
- package/dist/_vendor/ailf-core/index.d.ts +3 -2
- package/dist/_vendor/ailf-core/index.js +2 -1
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
- package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +7 -1
- package/dist/adapters/config-sources/ts-config-loader.js +21 -13
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
- package/dist/adapters/task-sources/index.d.ts +3 -4
- package/dist/adapters/task-sources/index.js +3 -4
- package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
- package/dist/adapters/task-sources/repo-schemas.js +228 -20
- package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
- package/dist/adapters/task-sources/repo-task-source.js +81 -122
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
- package/dist/adapters/task-sources/task-file-loader.js +21 -7
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/coverage-audit.js +3 -1
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +5 -4
- package/dist/commands/init.js +190 -25
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +15 -4
- package/dist/composition-root.js +100 -55
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/build-step-sequence.js +4 -2
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +32 -19
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +77 -26
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +51 -31
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
- package/dist/pipeline/compiler/literacy-bridge.js +2 -2
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
- package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
- package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/expand-tasks.d.ts +2 -2
- package/dist/pipeline/expand-tasks.js +2 -2
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +16 -9
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +16 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
- package/dist/pipeline/mirror-repo-tasks.js +10 -10
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +68 -30
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +32 -24
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
|
@@ -9,7 +9,6 @@
|
|
|
9
9
|
import assert from "node:assert/strict";
|
|
10
10
|
import { describe, it } from "node:test";
|
|
11
11
|
import { tmpdir } from "os";
|
|
12
|
-
import { LiteracyVariant } from "../../normalize-mode.js";
|
|
13
12
|
import { compileToPromptfoo } from "../promptfoo-compiler.js";
|
|
14
13
|
// ---------------------------------------------------------------------------
|
|
15
14
|
// Helpers
|
|
@@ -123,8 +122,8 @@ describe("compileToPromptfoo", () => {
|
|
|
123
122
|
const result = compileToPromptfoo(graph, {
|
|
124
123
|
mode: "literacy",
|
|
125
124
|
models: makeModels([
|
|
126
|
-
{ id: "model-a", label: "A", modes: [
|
|
127
|
-
{ id: "model-b", label: "B", modes: ["
|
|
125
|
+
{ id: "model-a", label: "A", modes: ["literacy"] },
|
|
126
|
+
{ id: "model-b", label: "B", modes: ["mcp-server"] },
|
|
128
127
|
]),
|
|
129
128
|
rootDir: tmpdir(),
|
|
130
129
|
});
|
|
@@ -262,12 +262,14 @@ describe("InMemoryPluginRegistry", () => {
|
|
|
262
262
|
});
|
|
263
263
|
assert.equal(registry.getAssertions().length, 1);
|
|
264
264
|
});
|
|
265
|
-
it("registers a complete preset", () => {
|
|
265
|
+
it("registers a complete preset with mode base", () => {
|
|
266
266
|
const registry = new InMemoryPluginRegistry();
|
|
267
|
+
// Must register mode base first
|
|
268
|
+
const { createLiteracyModeBase } = require("../mode-bases/literacy.js");
|
|
269
|
+
registry.registerModeBase(createLiteracyModeBase());
|
|
267
270
|
registry.registerPreset(sanityLiteracyPreset);
|
|
268
|
-
//
|
|
271
|
+
// Mode + rubrics from mode base, domain config from preset
|
|
269
272
|
assert.ok(registry.getMode("literacy"));
|
|
270
|
-
assert.ok(registry.getAssertions().length > 0);
|
|
271
273
|
assert.ok(registry.getRubricTemplates().length > 0);
|
|
272
274
|
assert.ok(registry.getPresets().length === 1);
|
|
273
275
|
});
|
|
@@ -280,83 +282,21 @@ describe("sanityLiteracyPreset", () => {
|
|
|
280
282
|
assert.equal(sanityLiteracyPreset.name, "sanity-literacy");
|
|
281
283
|
assert.equal(sanityLiteracyPreset.manifest.pluginApiVersion, 1);
|
|
282
284
|
});
|
|
283
|
-
it("
|
|
284
|
-
assert.equal(sanityLiteracyPreset.
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
assert.
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
assert.equal(sanityLiteracyPreset.rubricTemplates?.length, 3);
|
|
295
|
-
const ids = sanityLiteracyPreset.rubricTemplates.map((t) => t.id);
|
|
296
|
-
assert.ok(ids.includes("task-completion"));
|
|
297
|
-
assert.ok(ids.includes("code-correctness"));
|
|
298
|
-
assert.ok(ids.includes("doc-coverage"));
|
|
299
|
-
});
|
|
300
|
-
it("rubric template scales match config/rubrics.ts authoritative source", () => {
|
|
301
|
-
const templates = sanityLiteracyPreset.rubricTemplates;
|
|
302
|
-
const tc = templates.find((t) => t.id === "task-completion");
|
|
303
|
-
assert.deepEqual(tc.scale, [
|
|
304
|
-
"0: Couldn't attempt — missing critical information",
|
|
305
|
-
"20: Attempted but fundamentally wrong approach",
|
|
306
|
-
"50: Partial implementation — major functional gaps",
|
|
307
|
-
"80: Mostly complete — minor issues or missing edge cases",
|
|
308
|
-
"100: Fully functional code — works as expected",
|
|
309
|
-
]);
|
|
310
|
-
assert.equal(tc.criteriaLabel, "Must demonstrate:");
|
|
311
|
-
const cc = templates.find((t) => t.id === "code-correctness");
|
|
312
|
-
assert.deepEqual(cc.scale, [
|
|
313
|
-
"0: Broken code, syntax errors, or deprecated APIs",
|
|
314
|
-
"30: Works but uses anti-patterns or inefficient approaches",
|
|
315
|
-
"50: Works but not idiomatic",
|
|
316
|
-
"80: Follows most best practices",
|
|
317
|
-
"100: Follows all best practices, idiomatic implementation",
|
|
318
|
-
]);
|
|
319
|
-
assert.equal(cc.criteriaLabel, "Check for:");
|
|
320
|
-
const dc = templates.find((t) => t.id === "doc-coverage");
|
|
321
|
-
assert.deepEqual(dc.scale, [
|
|
322
|
-
"0: Had to hallucinate/guess most implementation details",
|
|
323
|
-
"30: Significant gaps — filled with assumptions",
|
|
324
|
-
"50: Some gaps — inferred from partial information",
|
|
325
|
-
"80: Minor gaps — almost everything was documented",
|
|
326
|
-
"100: Complete coverage — all necessary info was in docs",
|
|
327
|
-
]);
|
|
285
|
+
it("targets literacy mode base", () => {
|
|
286
|
+
assert.equal(sanityLiteracyPreset.mode, "literacy");
|
|
287
|
+
});
|
|
288
|
+
it("does not bundle assertions (now framework built-ins)", () => {
|
|
289
|
+
assert.equal(sanityLiteracyPreset.assertions, undefined);
|
|
290
|
+
});
|
|
291
|
+
it("does not bundle rubrics/scoring/prompts (now in literacy mode base)", () => {
|
|
292
|
+
// Evaluation methodology moved to mode-bases/literacy.ts
|
|
293
|
+
assert.equal(sanityLiteracyPreset.rubricTemplates, undefined);
|
|
294
|
+
assert.equal(sanityLiteracyPreset.scoringProfiles, undefined);
|
|
295
|
+
assert.equal(sanityLiteracyPreset.promptTemplates, undefined);
|
|
328
296
|
});
|
|
329
297
|
it("includes sanity:// fixture resolver", () => {
|
|
330
298
|
assert.ok(sanityLiteracyPreset.fixtureResolvers?.some((r) => r.scheme === "sanity://"));
|
|
331
299
|
});
|
|
332
|
-
it("includes 3 prompt templates", () => {
|
|
333
|
-
const templates = sanityLiteracyPreset.promptTemplates;
|
|
334
|
-
assert.ok(templates);
|
|
335
|
-
assert.ok(templates["with-docs"]);
|
|
336
|
-
assert.ok(templates["without-docs"]);
|
|
337
|
-
assert.ok(templates["agentic"]);
|
|
338
|
-
assert.equal(Object.keys(templates).length, 3);
|
|
339
|
-
});
|
|
340
|
-
it("prompt template content matches literacy handler", () => {
|
|
341
|
-
const templates = sanityLiteracyPreset.promptTemplates;
|
|
342
|
-
assert.ok(templates["with-docs"].template.includes("{{docs}}"));
|
|
343
|
-
assert.ok(templates["with-docs"].template.includes("{{task}}"));
|
|
344
|
-
assert.ok(templates["without-docs"].template.includes("{{task}}"));
|
|
345
|
-
assert.ok(templates["agentic"].template.includes("{{task}}"));
|
|
346
|
-
});
|
|
347
|
-
it("includes default and output-only scoring profiles", () => {
|
|
348
|
-
const profiles = sanityLiteracyPreset.scoringProfiles;
|
|
349
|
-
assert.ok(profiles);
|
|
350
|
-
assert.deepEqual(profiles["default"], {
|
|
351
|
-
"task-completion": 0.5,
|
|
352
|
-
"code-correctness": 0.25,
|
|
353
|
-
"doc-coverage": 0.25,
|
|
354
|
-
});
|
|
355
|
-
assert.deepEqual(profiles["output-only"], {
|
|
356
|
-
"task-completion": 0.6,
|
|
357
|
-
"code-correctness": 0.4,
|
|
358
|
-
});
|
|
359
|
-
});
|
|
360
300
|
it("includes 3 source definitions", () => {
|
|
361
301
|
const sources = sanityLiteracyPreset.sourceDefs;
|
|
362
302
|
assert.ok(sources);
|
|
@@ -376,26 +316,13 @@ describe("sanityLiteracyPreset", () => {
|
|
|
376
316
|
assert.ok(features);
|
|
377
317
|
assert.equal(features.features.length, 14);
|
|
378
318
|
const ids = features.features.map((f) => f.id);
|
|
379
|
-
// Covered features
|
|
380
319
|
assert.ok(ids.includes("groq"));
|
|
381
320
|
assert.ok(ids.includes("visual-editing"));
|
|
382
|
-
assert.ok(ids.includes("nextjs-live"));
|
|
383
|
-
assert.ok(ids.includes("functions"));
|
|
384
|
-
assert.ok(ids.includes("studio-setup"));
|
|
385
|
-
assert.ok(ids.includes("frameworks"));
|
|
386
|
-
// Uncovered features
|
|
387
321
|
assert.ok(ids.includes("portable-text"));
|
|
388
|
-
assert.ok(ids.includes("image-assets"));
|
|
389
|
-
assert.ok(ids.includes("mutations"));
|
|
390
|
-
assert.ok(ids.includes("schemas"));
|
|
391
|
-
assert.ok(ids.includes("authentication"));
|
|
392
|
-
assert.ok(ids.includes("webhooks"));
|
|
393
|
-
assert.ok(ids.includes("realtime"));
|
|
394
322
|
assert.ok(ids.includes("ai-assist"));
|
|
395
323
|
});
|
|
396
324
|
it("includes a docFetcher factory", () => {
|
|
397
325
|
assert.equal(typeof sanityLiteracyPreset.docFetcher, "function");
|
|
398
|
-
// The factory should return a SanityDocFetcher instance
|
|
399
326
|
const fetcher = sanityLiteracyPreset.docFetcher();
|
|
400
327
|
assert.ok(fetcher);
|
|
401
328
|
assert.equal(typeof fetcher.fetch, "function");
|
|
@@ -405,28 +332,34 @@ describe("sanityLiteracyPreset", () => {
|
|
|
405
332
|
// createSanityLiteracyPreset factory
|
|
406
333
|
// ---------------------------------------------------------------------------
|
|
407
334
|
describe("createSanityLiteracyPreset", () => {
|
|
408
|
-
it("returns a preset
|
|
335
|
+
it("returns a domain-only preset targeting literacy mode", () => {
|
|
409
336
|
const preset = createSanityLiteracyPreset({ rootDir: "/tmp/test" });
|
|
410
337
|
assert.equal(preset.name, "sanity-literacy");
|
|
411
|
-
assert.
|
|
412
|
-
|
|
413
|
-
assert.ok(preset.rubricTemplates);
|
|
338
|
+
assert.equal(preset.mode, "literacy");
|
|
339
|
+
// Domain config present
|
|
414
340
|
assert.ok(preset.fixtureResolvers);
|
|
415
|
-
assert.ok(preset.promptTemplates);
|
|
416
|
-
assert.ok(preset.scoringProfiles);
|
|
417
341
|
assert.ok(preset.docFetcher);
|
|
418
342
|
assert.ok(preset.sourceDefs);
|
|
419
343
|
assert.ok(preset.featureDefs);
|
|
344
|
+
// Methodology inherited from mode base, not on preset
|
|
345
|
+
assert.equal(preset.rubricTemplates, undefined);
|
|
346
|
+
assert.equal(preset.scoringProfiles, undefined);
|
|
347
|
+
assert.equal(preset.promptTemplates, undefined);
|
|
420
348
|
});
|
|
421
|
-
it("registers all extension points
|
|
349
|
+
it("registers all extension points via mode base + domain config", () => {
|
|
422
350
|
const registry = new InMemoryPluginRegistry();
|
|
351
|
+
// Must register mode base first (composition root does this)
|
|
352
|
+
const { createLiteracyModeBase } = require("../mode-bases/literacy.js");
|
|
353
|
+
registry.registerModeBase(createLiteracyModeBase());
|
|
423
354
|
const preset = createSanityLiteracyPreset({ rootDir: "/tmp/test" });
|
|
424
355
|
registry.registerPreset(preset);
|
|
356
|
+
// Mode from mode base
|
|
425
357
|
assert.ok(registry.getMode("literacy"));
|
|
426
|
-
|
|
427
|
-
assert.
|
|
428
|
-
assert.
|
|
429
|
-
assert.
|
|
358
|
+
// Rubrics, scoring, prompts inherited from mode base
|
|
359
|
+
assert.equal(registry.getRubricTemplates().length, 3);
|
|
360
|
+
assert.equal(Object.keys(registry.getPromptTemplates()).length, 3);
|
|
361
|
+
assert.equal(Object.keys(registry.getScoringProfiles()).length, 2);
|
|
362
|
+
// Domain config from preset
|
|
430
363
|
assert.ok(registry.getDocFetcherFactory());
|
|
431
364
|
assert.equal(registry.getSourceDefs().length, 3);
|
|
432
365
|
assert.ok(registry.getFeatureDefs());
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* task-bridge.test.ts — Unit tests for the TaskDefinition ↔ LiteracyTaskDefinition bridge.
|
|
3
|
+
*
|
|
4
|
+
* Covers round-trip fidelity, edge cases (missing optionals, all optionals),
|
|
5
|
+
* assertion type mapping, and all four CanonicalDocRef / GeneralizedDocRef variants.
|
|
6
|
+
*
|
|
7
|
+
* Run: npx tsx --test src/pipeline/compiler/__tests__/task-bridge.test.ts
|
|
8
|
+
*/
|
|
9
|
+
export {};
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* task-bridge.test.ts — Unit tests for the TaskDefinition ↔ LiteracyTaskDefinition bridge.
|
|
3
|
+
*
|
|
4
|
+
* Covers round-trip fidelity, edge cases (missing optionals, all optionals),
|
|
5
|
+
* assertion type mapping, and all four CanonicalDocRef / GeneralizedDocRef variants.
|
|
6
|
+
*
|
|
7
|
+
* Run: npx tsx --test src/pipeline/compiler/__tests__/task-bridge.test.ts
|
|
8
|
+
*/
|
|
9
|
+
import assert from "node:assert/strict";
|
|
10
|
+
import { describe, it } from "node:test";
|
|
11
|
+
import { toGeneralized, toLiteracyTask } from "../task-bridge.js";
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
// Fixtures
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
/** Minimal old-style task — only required fields, no optionals */
|
|
16
|
+
const minimalOldTask = {
|
|
17
|
+
id: "groq-filter-basic",
|
|
18
|
+
description: "Filter documents with GROQ",
|
|
19
|
+
featureArea: "groq",
|
|
20
|
+
taskPrompt: "Write a GROQ query that filters by _type",
|
|
21
|
+
canonicalDocs: [],
|
|
22
|
+
referenceSolution: "solutions/groq-filter-basic.ts",
|
|
23
|
+
docCoverage: false,
|
|
24
|
+
assertions: [],
|
|
25
|
+
};
|
|
26
|
+
/** Fully-populated old-style task — every optional filled */
|
|
27
|
+
const fullOldTask = {
|
|
28
|
+
id: "mutations-create-advanced",
|
|
29
|
+
description: "Create documents with mutations API",
|
|
30
|
+
featureArea: "mutations",
|
|
31
|
+
taskPrompt: "Use the mutations API to create a document with references",
|
|
32
|
+
canonicalDocs: [
|
|
33
|
+
{ slug: "mutations-overview", reason: "Primary mutations guide" },
|
|
34
|
+
{ path: "/docs/mutations/create", reason: "Create-specific docs" },
|
|
35
|
+
{
|
|
36
|
+
id: "doc-123",
|
|
37
|
+
reason: "Imported draft",
|
|
38
|
+
slug: "draft-slug",
|
|
39
|
+
path: "/docs/draft",
|
|
40
|
+
},
|
|
41
|
+
{ perspective: "release-v3", reason: "V3 release content" },
|
|
42
|
+
],
|
|
43
|
+
referenceSolution: "solutions/mutations-create-advanced.ts",
|
|
44
|
+
docCoverage: true,
|
|
45
|
+
assertions: [
|
|
46
|
+
{
|
|
47
|
+
type: "llm-rubric",
|
|
48
|
+
template: "code-quality",
|
|
49
|
+
criteria: ["correct", "idiomatic"],
|
|
50
|
+
weight: 2,
|
|
51
|
+
},
|
|
52
|
+
{ type: "contains", value: "createIfNotExists" },
|
|
53
|
+
{ type: "javascript", value: "output.includes('mutation')", weight: 1 },
|
|
54
|
+
],
|
|
55
|
+
baseline: { enabled: true, rubric: "full" },
|
|
56
|
+
tags: ["mutations", "advanced", "references"],
|
|
57
|
+
status: "active",
|
|
58
|
+
extraVars: { customHint: "Use createIfNotExists", maxRetries: 3 },
|
|
59
|
+
};
|
|
60
|
+
/** Minimal new-style literacy task — only required/mode fields */
|
|
61
|
+
const minimalNewTask = {
|
|
62
|
+
mode: "literacy",
|
|
63
|
+
id: "studio-config-basic",
|
|
64
|
+
title: "Configure a Sanity Studio",
|
|
65
|
+
};
|
|
66
|
+
/** Fully-populated new-style literacy task (only fields that round-trip through old type) */
|
|
67
|
+
const fullNewTask = {
|
|
68
|
+
mode: "literacy",
|
|
69
|
+
id: "groq-projection-advanced",
|
|
70
|
+
title: "Advanced GROQ projections",
|
|
71
|
+
area: "groq",
|
|
72
|
+
tags: ["groq", "projections"],
|
|
73
|
+
status: "active",
|
|
74
|
+
assertions: [
|
|
75
|
+
{
|
|
76
|
+
type: "llm-rubric",
|
|
77
|
+
template: "completeness",
|
|
78
|
+
criteria: ["covers edge cases"],
|
|
79
|
+
weight: 3,
|
|
80
|
+
},
|
|
81
|
+
{ type: "contains", value: "coalesce" },
|
|
82
|
+
],
|
|
83
|
+
prompt: {
|
|
84
|
+
text: "Write a GROQ query using projections with coalesce",
|
|
85
|
+
vars: { difficulty: "advanced", topic: "projections" },
|
|
86
|
+
},
|
|
87
|
+
context: {
|
|
88
|
+
docs: [
|
|
89
|
+
{ slug: "groq-projections", reason: "Projection docs" },
|
|
90
|
+
{ path: "/docs/groq/projections", reason: "Path-based ref" },
|
|
91
|
+
{ id: "groq-doc-456", reason: "By ID" },
|
|
92
|
+
{ perspective: "release-groq-v2", reason: "GROQ v2 release" },
|
|
93
|
+
],
|
|
94
|
+
},
|
|
95
|
+
referenceSolution: "solutions/groq-projection-advanced.ts",
|
|
96
|
+
docCoverage: true,
|
|
97
|
+
baseline: { enabled: false, rubric: "abbreviated" },
|
|
98
|
+
};
|
|
99
|
+
// ---------------------------------------------------------------------------
|
|
100
|
+
// toGeneralized — old TaskDefinition → LiteracyTaskDefinition
|
|
101
|
+
// ---------------------------------------------------------------------------
|
|
102
|
+
describe("toGeneralized", () => {
|
|
103
|
+
it("converts a minimal old task to a LiteracyTaskDefinition", () => {
|
|
104
|
+
const result = toGeneralized(minimalOldTask);
|
|
105
|
+
assert.equal(result.mode, "literacy");
|
|
106
|
+
assert.equal(result.id, "groq-filter-basic");
|
|
107
|
+
assert.equal(result.title, "Filter documents with GROQ");
|
|
108
|
+
assert.equal(result.area, "groq");
|
|
109
|
+
assert.equal(result.referenceSolution, "solutions/groq-filter-basic.ts");
|
|
110
|
+
assert.equal(result.docCoverage, false);
|
|
111
|
+
assert.deepEqual(result.assertions, []);
|
|
112
|
+
assert.deepEqual(result.context?.docs, []);
|
|
113
|
+
assert.equal(result.prompt?.text, "Write a GROQ query that filters by _type");
|
|
114
|
+
});
|
|
115
|
+
it("converts a fully-populated old task preserving all fields", () => {
|
|
116
|
+
const result = toGeneralized(fullOldTask);
|
|
117
|
+
assert.equal(result.mode, "literacy");
|
|
118
|
+
assert.equal(result.id, "mutations-create-advanced");
|
|
119
|
+
assert.equal(result.title, "Create documents with mutations API");
|
|
120
|
+
assert.equal(result.area, "mutations");
|
|
121
|
+
assert.equal(result.referenceSolution, "solutions/mutations-create-advanced.ts");
|
|
122
|
+
assert.equal(result.docCoverage, true);
|
|
123
|
+
assert.deepEqual(result.baseline, { enabled: true, rubric: "full" });
|
|
124
|
+
assert.deepEqual(result.tags, ["mutations", "advanced", "references"]);
|
|
125
|
+
assert.equal(result.status, "active");
|
|
126
|
+
assert.deepEqual(result.prompt?.vars, {
|
|
127
|
+
customHint: "Use createIfNotExists",
|
|
128
|
+
maxRetries: 3,
|
|
129
|
+
});
|
|
130
|
+
assert.equal(result.prompt?.text, "Use the mutations API to create a document with references");
|
|
131
|
+
});
|
|
132
|
+
it("does not set optional fields when absent in old task", () => {
|
|
133
|
+
const result = toGeneralized(minimalOldTask);
|
|
134
|
+
assert.equal(result.baseline, undefined);
|
|
135
|
+
assert.equal(result.tags, undefined);
|
|
136
|
+
assert.equal(result.status, undefined);
|
|
137
|
+
assert.equal(result.prompt?.vars, undefined);
|
|
138
|
+
});
|
|
139
|
+
});
|
|
140
|
+
// ---------------------------------------------------------------------------
|
|
141
|
+
// toLiteracyTask — LiteracyTaskDefinition → old TaskDefinition
|
|
142
|
+
// ---------------------------------------------------------------------------
|
|
143
|
+
describe("toLiteracyTask", () => {
|
|
144
|
+
it("converts a minimal new task to a TaskDefinition", () => {
|
|
145
|
+
const result = toLiteracyTask(minimalNewTask);
|
|
146
|
+
assert.equal(result.id, "studio-config-basic");
|
|
147
|
+
assert.equal(result.description, "Configure a Sanity Studio");
|
|
148
|
+
assert.equal(result.featureArea, "");
|
|
149
|
+
assert.equal(result.taskPrompt, "");
|
|
150
|
+
assert.deepEqual(result.canonicalDocs, []);
|
|
151
|
+
assert.equal(result.referenceSolution, "");
|
|
152
|
+
assert.equal(result.docCoverage, false);
|
|
153
|
+
assert.deepEqual(result.assertions, []);
|
|
154
|
+
});
|
|
155
|
+
it("converts a fully-populated new task preserving all mappable fields", () => {
|
|
156
|
+
const result = toLiteracyTask(fullNewTask);
|
|
157
|
+
assert.equal(result.id, "groq-projection-advanced");
|
|
158
|
+
assert.equal(result.description, "Advanced GROQ projections");
|
|
159
|
+
assert.equal(result.featureArea, "groq");
|
|
160
|
+
assert.equal(result.taskPrompt, "Write a GROQ query using projections with coalesce");
|
|
161
|
+
assert.equal(result.referenceSolution, "solutions/groq-projection-advanced.ts");
|
|
162
|
+
assert.equal(result.docCoverage, true);
|
|
163
|
+
assert.deepEqual(result.baseline, { enabled: false, rubric: "abbreviated" });
|
|
164
|
+
assert.deepEqual(result.tags, ["groq", "projections"]);
|
|
165
|
+
assert.equal(result.status, "active");
|
|
166
|
+
assert.deepEqual(result.extraVars, {
|
|
167
|
+
difficulty: "advanced",
|
|
168
|
+
topic: "projections",
|
|
169
|
+
});
|
|
170
|
+
});
|
|
171
|
+
it("uses prompt.template as fallback when prompt.text is absent", () => {
|
|
172
|
+
const task = {
|
|
173
|
+
mode: "literacy",
|
|
174
|
+
id: "template-task",
|
|
175
|
+
title: "Template-based task",
|
|
176
|
+
prompt: { template: "my-named-template" },
|
|
177
|
+
};
|
|
178
|
+
const result = toLiteracyTask(task);
|
|
179
|
+
assert.equal(result.taskPrompt, "my-named-template");
|
|
180
|
+
});
|
|
181
|
+
it("does not set optional fields when absent in new task", () => {
|
|
182
|
+
const result = toLiteracyTask(minimalNewTask);
|
|
183
|
+
assert.equal(result.baseline, undefined);
|
|
184
|
+
assert.equal(result.tags, undefined);
|
|
185
|
+
assert.equal(result.status, undefined);
|
|
186
|
+
assert.equal(result.extraVars, undefined);
|
|
187
|
+
});
|
|
188
|
+
});
|
|
189
|
+
// ---------------------------------------------------------------------------
|
|
190
|
+
// Round-trip: old → new → old (must be lossless)
|
|
191
|
+
// ---------------------------------------------------------------------------
|
|
192
|
+
describe("round-trip: toLiteracyTask(toGeneralized(oldTask))", () => {
|
|
193
|
+
it("preserves all fields of a minimal old task", () => {
|
|
194
|
+
const roundTripped = toLiteracyTask(toGeneralized(minimalOldTask));
|
|
195
|
+
assert.deepEqual(roundTripped, minimalOldTask);
|
|
196
|
+
});
|
|
197
|
+
it("preserves all fields of a fully-populated old task", () => {
|
|
198
|
+
const roundTripped = toLiteracyTask(toGeneralized(fullOldTask));
|
|
199
|
+
assert.deepEqual(roundTripped, fullOldTask);
|
|
200
|
+
});
|
|
201
|
+
});
|
|
202
|
+
// ---------------------------------------------------------------------------
|
|
203
|
+
// Round-trip: new → old → new (lossless for mappable fields)
|
|
204
|
+
// ---------------------------------------------------------------------------
|
|
205
|
+
describe("round-trip: toGeneralized(toLiteracyTask(newTask))", () => {
|
|
206
|
+
it("preserves all fields of a minimal new task", () => {
|
|
207
|
+
const roundTripped = toGeneralized(toLiteracyTask(minimalNewTask));
|
|
208
|
+
assert.equal(roundTripped.mode, "literacy");
|
|
209
|
+
assert.equal(roundTripped.id, minimalNewTask.id);
|
|
210
|
+
assert.equal(roundTripped.title, minimalNewTask.title);
|
|
211
|
+
});
|
|
212
|
+
it("preserves all mappable fields of a fully-populated new task", () => {
|
|
213
|
+
const roundTripped = toGeneralized(toLiteracyTask(fullNewTask));
|
|
214
|
+
assert.equal(roundTripped.mode, "literacy");
|
|
215
|
+
assert.equal(roundTripped.id, fullNewTask.id);
|
|
216
|
+
assert.equal(roundTripped.title, fullNewTask.title);
|
|
217
|
+
assert.equal(roundTripped.area, fullNewTask.area);
|
|
218
|
+
assert.deepEqual(roundTripped.tags, fullNewTask.tags);
|
|
219
|
+
assert.equal(roundTripped.status, fullNewTask.status);
|
|
220
|
+
assert.deepEqual(roundTripped.assertions, fullNewTask.assertions);
|
|
221
|
+
assert.equal(roundTripped.prompt?.text, fullNewTask.prompt?.text);
|
|
222
|
+
assert.deepEqual(roundTripped.prompt?.vars, fullNewTask.prompt?.vars);
|
|
223
|
+
assert.deepEqual(roundTripped.context?.docs, fullNewTask.context?.docs);
|
|
224
|
+
assert.equal(roundTripped.referenceSolution, fullNewTask.referenceSolution);
|
|
225
|
+
assert.equal(roundTripped.docCoverage, fullNewTask.docCoverage);
|
|
226
|
+
assert.deepEqual(roundTripped.baseline, fullNewTask.baseline);
|
|
227
|
+
});
|
|
228
|
+
});
|
|
229
|
+
// ---------------------------------------------------------------------------
|
|
230
|
+
// CanonicalDocRef ↔ GeneralizedDocRef mapping (all 4 variants)
|
|
231
|
+
// ---------------------------------------------------------------------------
|
|
232
|
+
describe("doc ref mapping", () => {
|
|
233
|
+
const slugRef = {
|
|
234
|
+
slug: "my-article",
|
|
235
|
+
reason: "testing slug",
|
|
236
|
+
};
|
|
237
|
+
const pathRef = {
|
|
238
|
+
path: "/docs/my-article",
|
|
239
|
+
reason: "testing path",
|
|
240
|
+
};
|
|
241
|
+
const idRef = {
|
|
242
|
+
id: "abc-123",
|
|
243
|
+
reason: "testing id",
|
|
244
|
+
slug: "annotated-slug",
|
|
245
|
+
path: "/docs/annotated",
|
|
246
|
+
};
|
|
247
|
+
const perspectiveRef = {
|
|
248
|
+
perspective: "release-v4",
|
|
249
|
+
reason: "testing perspective",
|
|
250
|
+
};
|
|
251
|
+
it("preserves slug ref through round-trip", () => {
|
|
252
|
+
const task = { ...minimalOldTask, canonicalDocs: [slugRef] };
|
|
253
|
+
const roundTripped = toLiteracyTask(toGeneralized(task));
|
|
254
|
+
assert.deepEqual(roundTripped.canonicalDocs, [slugRef]);
|
|
255
|
+
});
|
|
256
|
+
it("preserves path ref through round-trip", () => {
|
|
257
|
+
const task = { ...minimalOldTask, canonicalDocs: [pathRef] };
|
|
258
|
+
const roundTripped = toLiteracyTask(toGeneralized(task));
|
|
259
|
+
assert.deepEqual(roundTripped.canonicalDocs, [pathRef]);
|
|
260
|
+
});
|
|
261
|
+
it("preserves id ref (with optional slug/path annotations) through round-trip", () => {
|
|
262
|
+
const task = { ...minimalOldTask, canonicalDocs: [idRef] };
|
|
263
|
+
const roundTripped = toLiteracyTask(toGeneralized(task));
|
|
264
|
+
assert.deepEqual(roundTripped.canonicalDocs, [idRef]);
|
|
265
|
+
});
|
|
266
|
+
it("preserves perspective ref through round-trip", () => {
|
|
267
|
+
const task = {
|
|
268
|
+
...minimalOldTask,
|
|
269
|
+
canonicalDocs: [perspectiveRef],
|
|
270
|
+
};
|
|
271
|
+
const roundTripped = toLiteracyTask(toGeneralized(task));
|
|
272
|
+
assert.deepEqual(roundTripped.canonicalDocs, [perspectiveRef]);
|
|
273
|
+
});
|
|
274
|
+
it("preserves all 4 ref variants together through round-trip", () => {
|
|
275
|
+
const allRefs = [slugRef, pathRef, idRef, perspectiveRef];
|
|
276
|
+
const task = { ...minimalOldTask, canonicalDocs: allRefs };
|
|
277
|
+
const roundTripped = toLiteracyTask(toGeneralized(task));
|
|
278
|
+
assert.deepEqual(roundTripped.canonicalDocs, allRefs);
|
|
279
|
+
});
|
|
280
|
+
});
|
|
281
|
+
// ---------------------------------------------------------------------------
|
|
282
|
+
// Assertion type mapping
|
|
283
|
+
// ---------------------------------------------------------------------------
|
|
284
|
+
describe("assertion type mapping", () => {
|
|
285
|
+
const templatedAssertion = {
|
|
286
|
+
type: "llm-rubric",
|
|
287
|
+
template: "code-quality",
|
|
288
|
+
criteria: ["correct", "idiomatic", "secure"],
|
|
289
|
+
weight: 2,
|
|
290
|
+
};
|
|
291
|
+
const valueAssertion = {
|
|
292
|
+
type: "contains",
|
|
293
|
+
value: "createDocument",
|
|
294
|
+
};
|
|
295
|
+
const jsAssertion = {
|
|
296
|
+
type: "javascript",
|
|
297
|
+
value: "output.includes('done')",
|
|
298
|
+
weight: 1,
|
|
299
|
+
};
|
|
300
|
+
it("preserves templated assertions through old→new→old round-trip", () => {
|
|
301
|
+
const task = {
|
|
302
|
+
...minimalOldTask,
|
|
303
|
+
assertions: [templatedAssertion],
|
|
304
|
+
};
|
|
305
|
+
const roundTripped = toLiteracyTask(toGeneralized(task));
|
|
306
|
+
assert.deepEqual(roundTripped.assertions, [templatedAssertion]);
|
|
307
|
+
});
|
|
308
|
+
it("preserves value assertions through old→new→old round-trip", () => {
|
|
309
|
+
const task = {
|
|
310
|
+
...minimalOldTask,
|
|
311
|
+
assertions: [valueAssertion],
|
|
312
|
+
};
|
|
313
|
+
const roundTripped = toLiteracyTask(toGeneralized(task));
|
|
314
|
+
assert.deepEqual(roundTripped.assertions, [valueAssertion]);
|
|
315
|
+
});
|
|
316
|
+
it("preserves mixed assertion types through old→new→old round-trip", () => {
|
|
317
|
+
const mixed = [templatedAssertion, valueAssertion, jsAssertion];
|
|
318
|
+
const task = { ...minimalOldTask, assertions: mixed };
|
|
319
|
+
const roundTripped = toLiteracyTask(toGeneralized(task));
|
|
320
|
+
assert.deepEqual(roundTripped.assertions, mixed);
|
|
321
|
+
});
|
|
322
|
+
it("preserves assertions through new→old→new round-trip", () => {
|
|
323
|
+
const genAssertions = [
|
|
324
|
+
{
|
|
325
|
+
type: "llm-rubric",
|
|
326
|
+
template: "completeness",
|
|
327
|
+
criteria: ["thorough"],
|
|
328
|
+
weight: 1,
|
|
329
|
+
},
|
|
330
|
+
{ type: "regex", value: "^import.*sanity" },
|
|
331
|
+
];
|
|
332
|
+
const task = {
|
|
333
|
+
...minimalNewTask,
|
|
334
|
+
assertions: genAssertions,
|
|
335
|
+
};
|
|
336
|
+
const roundTripped = toGeneralized(toLiteracyTask(task));
|
|
337
|
+
assert.deepEqual(roundTripped.assertions, genAssertions);
|
|
338
|
+
});
|
|
339
|
+
});
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* tool-loop-openai.test.ts — Tests for the OpenAI MCP tool loop.
|
|
3
|
+
*
|
|
4
|
+
* Tests both API variants (Chat Completions and Responses) with mocked
|
|
5
|
+
* fetch to verify tool calling, error handling, token tracking, and
|
|
6
|
+
* round exhaustion.
|
|
7
|
+
*
|
|
8
|
+
* Run: npx tsx --test src/pipeline/compiler/__tests__/tool-loop-openai.test.ts
|
|
9
|
+
*/
|
|
10
|
+
export {};
|