@sanity/ailf 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +28 -23
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +6 -0
- package/dist/_vendor/ailf-core/config-helpers.js +29 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +164 -94
- package/dist/_vendor/ailf-core/examples/index.js +208 -114
- package/dist/_vendor/ailf-core/index.d.ts +1 -0
- package/dist/_vendor/ailf-core/index.js +1 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +20 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +14 -2
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +12 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +47 -4
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +27 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
- package/dist/_vendor/ailf-tasks/cli.js +61 -0
- package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
- package/dist/_vendor/ailf-tasks/index.js +16 -0
- package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
- package/dist/_vendor/ailf-tasks/parser.js +73 -0
- package/dist/_vendor/ailf-tasks/schemas.d.ts +198 -0
- package/dist/_vendor/ailf-tasks/schemas.js +180 -0
- package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
- package/dist/_vendor/ailf-tasks/validation.js +162 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +6 -1
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +4 -6
- package/dist/adapters/task-sources/index.d.ts +1 -2
- package/dist/adapters/task-sources/index.js +1 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
- package/dist/adapters/task-sources/repo-schemas.js +2 -2
- package/dist/adapters/task-sources/repo-task-source.js +1 -1
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/task-file-loader.d.ts +9 -6
- package/dist/adapters/task-sources/task-file-loader.js +20 -6
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +2 -3
- package/dist/commands/init.js +56 -170
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/composition-root.d.ts +2 -3
- package/dist/composition-root.js +27 -14
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +30 -16
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +50 -15
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +51 -31
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +33 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/literacy-bridge.js +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +1 -1
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +1 -1
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +13 -2
- package/dist/pipeline/compiler/mode-bases/literacy.js +55 -1
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +2 -2
- package/dist/pipeline/compiler/mode-handlers/index.js +2 -2
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +69 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +307 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +22 -5
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +6 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +10 -5
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +314 -7
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +10 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +1 -1
- package/dist/pipeline/compiler/presets/sanity-literacy.js +1 -1
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +15 -8
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +15 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
- package/dist/pipeline/mirror-repo-tasks.js +1 -1
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +67 -29
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +24 -24
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* task-bridge.test.ts — Unit tests for the TaskDefinition ↔ LiteracyTaskDefinition bridge.
|
|
3
|
+
*
|
|
4
|
+
* Covers round-trip fidelity, edge cases (missing optionals, all optionals),
|
|
5
|
+
* assertion type mapping, and all four CanonicalDocRef / GeneralizedDocRef variants.
|
|
6
|
+
*
|
|
7
|
+
* Run: npx tsx --test src/pipeline/compiler/__tests__/task-bridge.test.ts
|
|
8
|
+
*/
|
|
9
|
+
import assert from "node:assert/strict";
|
|
10
|
+
import { describe, it } from "node:test";
|
|
11
|
+
import { toGeneralized, toLiteracyTask } from "../task-bridge.js";
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
// Fixtures
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
/** Minimal old-style task — only required fields, no optionals */
|
|
16
|
+
const minimalOldTask = {
|
|
17
|
+
id: "groq-filter-basic",
|
|
18
|
+
description: "Filter documents with GROQ",
|
|
19
|
+
featureArea: "groq",
|
|
20
|
+
taskPrompt: "Write a GROQ query that filters by _type",
|
|
21
|
+
canonicalDocs: [],
|
|
22
|
+
referenceSolution: "solutions/groq-filter-basic.ts",
|
|
23
|
+
docCoverage: false,
|
|
24
|
+
assertions: [],
|
|
25
|
+
};
|
|
26
|
+
/** Fully-populated old-style task — every optional filled */
|
|
27
|
+
const fullOldTask = {
|
|
28
|
+
id: "mutations-create-advanced",
|
|
29
|
+
description: "Create documents with mutations API",
|
|
30
|
+
featureArea: "mutations",
|
|
31
|
+
taskPrompt: "Use the mutations API to create a document with references",
|
|
32
|
+
canonicalDocs: [
|
|
33
|
+
{ slug: "mutations-overview", reason: "Primary mutations guide" },
|
|
34
|
+
{ path: "/docs/mutations/create", reason: "Create-specific docs" },
|
|
35
|
+
{
|
|
36
|
+
id: "doc-123",
|
|
37
|
+
reason: "Imported draft",
|
|
38
|
+
slug: "draft-slug",
|
|
39
|
+
path: "/docs/draft",
|
|
40
|
+
},
|
|
41
|
+
{ perspective: "release-v3", reason: "V3 release content" },
|
|
42
|
+
],
|
|
43
|
+
referenceSolution: "solutions/mutations-create-advanced.ts",
|
|
44
|
+
docCoverage: true,
|
|
45
|
+
assertions: [
|
|
46
|
+
{
|
|
47
|
+
type: "llm-rubric",
|
|
48
|
+
template: "code-quality",
|
|
49
|
+
criteria: ["correct", "idiomatic"],
|
|
50
|
+
weight: 2,
|
|
51
|
+
},
|
|
52
|
+
{ type: "contains", value: "createIfNotExists" },
|
|
53
|
+
{ type: "javascript", value: "output.includes('mutation')", weight: 1 },
|
|
54
|
+
],
|
|
55
|
+
baseline: { enabled: true, rubric: "full" },
|
|
56
|
+
tags: ["mutations", "advanced", "references"],
|
|
57
|
+
status: "active",
|
|
58
|
+
extraVars: { customHint: "Use createIfNotExists", maxRetries: 3 },
|
|
59
|
+
};
|
|
60
|
+
/** Minimal new-style literacy task — only required/mode fields */
|
|
61
|
+
const minimalNewTask = {
|
|
62
|
+
mode: "literacy",
|
|
63
|
+
id: "studio-config-basic",
|
|
64
|
+
title: "Configure a Sanity Studio",
|
|
65
|
+
};
|
|
66
|
+
/** Fully-populated new-style literacy task (only fields that round-trip through old type) */
|
|
67
|
+
const fullNewTask = {
|
|
68
|
+
mode: "literacy",
|
|
69
|
+
id: "groq-projection-advanced",
|
|
70
|
+
title: "Advanced GROQ projections",
|
|
71
|
+
area: "groq",
|
|
72
|
+
tags: ["groq", "projections"],
|
|
73
|
+
status: "active",
|
|
74
|
+
assertions: [
|
|
75
|
+
{
|
|
76
|
+
type: "llm-rubric",
|
|
77
|
+
template: "completeness",
|
|
78
|
+
criteria: ["covers edge cases"],
|
|
79
|
+
weight: 3,
|
|
80
|
+
},
|
|
81
|
+
{ type: "contains", value: "coalesce" },
|
|
82
|
+
],
|
|
83
|
+
prompt: {
|
|
84
|
+
text: "Write a GROQ query using projections with coalesce",
|
|
85
|
+
vars: { difficulty: "advanced", topic: "projections" },
|
|
86
|
+
},
|
|
87
|
+
context: {
|
|
88
|
+
docs: [
|
|
89
|
+
{ slug: "groq-projections", reason: "Projection docs" },
|
|
90
|
+
{ path: "/docs/groq/projections", reason: "Path-based ref" },
|
|
91
|
+
{ id: "groq-doc-456", reason: "By ID" },
|
|
92
|
+
{ perspective: "release-groq-v2", reason: "GROQ v2 release" },
|
|
93
|
+
],
|
|
94
|
+
},
|
|
95
|
+
referenceSolution: "solutions/groq-projection-advanced.ts",
|
|
96
|
+
docCoverage: true,
|
|
97
|
+
baseline: { enabled: false, rubric: "abbreviated" },
|
|
98
|
+
};
|
|
99
|
+
// ---------------------------------------------------------------------------
|
|
100
|
+
// toGeneralized — old TaskDefinition → LiteracyTaskDefinition
|
|
101
|
+
// ---------------------------------------------------------------------------
|
|
102
|
+
describe("toGeneralized", () => {
|
|
103
|
+
it("converts a minimal old task to a LiteracyTaskDefinition", () => {
|
|
104
|
+
const result = toGeneralized(minimalOldTask);
|
|
105
|
+
assert.equal(result.mode, "literacy");
|
|
106
|
+
assert.equal(result.id, "groq-filter-basic");
|
|
107
|
+
assert.equal(result.title, "Filter documents with GROQ");
|
|
108
|
+
assert.equal(result.area, "groq");
|
|
109
|
+
assert.equal(result.referenceSolution, "solutions/groq-filter-basic.ts");
|
|
110
|
+
assert.equal(result.docCoverage, false);
|
|
111
|
+
assert.deepEqual(result.assertions, []);
|
|
112
|
+
assert.deepEqual(result.context?.docs, []);
|
|
113
|
+
assert.equal(result.prompt?.text, "Write a GROQ query that filters by _type");
|
|
114
|
+
});
|
|
115
|
+
it("converts a fully-populated old task preserving all fields", () => {
|
|
116
|
+
const result = toGeneralized(fullOldTask);
|
|
117
|
+
assert.equal(result.mode, "literacy");
|
|
118
|
+
assert.equal(result.id, "mutations-create-advanced");
|
|
119
|
+
assert.equal(result.title, "Create documents with mutations API");
|
|
120
|
+
assert.equal(result.area, "mutations");
|
|
121
|
+
assert.equal(result.referenceSolution, "solutions/mutations-create-advanced.ts");
|
|
122
|
+
assert.equal(result.docCoverage, true);
|
|
123
|
+
assert.deepEqual(result.baseline, { enabled: true, rubric: "full" });
|
|
124
|
+
assert.deepEqual(result.tags, ["mutations", "advanced", "references"]);
|
|
125
|
+
assert.equal(result.status, "active");
|
|
126
|
+
assert.deepEqual(result.prompt?.vars, {
|
|
127
|
+
customHint: "Use createIfNotExists",
|
|
128
|
+
maxRetries: 3,
|
|
129
|
+
});
|
|
130
|
+
assert.equal(result.prompt?.text, "Use the mutations API to create a document with references");
|
|
131
|
+
});
|
|
132
|
+
it("does not set optional fields when absent in old task", () => {
|
|
133
|
+
const result = toGeneralized(minimalOldTask);
|
|
134
|
+
assert.equal(result.baseline, undefined);
|
|
135
|
+
assert.equal(result.tags, undefined);
|
|
136
|
+
assert.equal(result.status, undefined);
|
|
137
|
+
assert.equal(result.prompt?.vars, undefined);
|
|
138
|
+
});
|
|
139
|
+
});
|
|
140
|
+
// ---------------------------------------------------------------------------
|
|
141
|
+
// toLiteracyTask — LiteracyTaskDefinition → old TaskDefinition
|
|
142
|
+
// ---------------------------------------------------------------------------
|
|
143
|
+
describe("toLiteracyTask", () => {
|
|
144
|
+
it("converts a minimal new task to a TaskDefinition", () => {
|
|
145
|
+
const result = toLiteracyTask(minimalNewTask);
|
|
146
|
+
assert.equal(result.id, "studio-config-basic");
|
|
147
|
+
assert.equal(result.description, "Configure a Sanity Studio");
|
|
148
|
+
assert.equal(result.featureArea, "");
|
|
149
|
+
assert.equal(result.taskPrompt, "");
|
|
150
|
+
assert.deepEqual(result.canonicalDocs, []);
|
|
151
|
+
assert.equal(result.referenceSolution, "");
|
|
152
|
+
assert.equal(result.docCoverage, false);
|
|
153
|
+
assert.deepEqual(result.assertions, []);
|
|
154
|
+
});
|
|
155
|
+
it("converts a fully-populated new task preserving all mappable fields", () => {
|
|
156
|
+
const result = toLiteracyTask(fullNewTask);
|
|
157
|
+
assert.equal(result.id, "groq-projection-advanced");
|
|
158
|
+
assert.equal(result.description, "Advanced GROQ projections");
|
|
159
|
+
assert.equal(result.featureArea, "groq");
|
|
160
|
+
assert.equal(result.taskPrompt, "Write a GROQ query using projections with coalesce");
|
|
161
|
+
assert.equal(result.referenceSolution, "solutions/groq-projection-advanced.ts");
|
|
162
|
+
assert.equal(result.docCoverage, true);
|
|
163
|
+
assert.deepEqual(result.baseline, { enabled: false, rubric: "abbreviated" });
|
|
164
|
+
assert.deepEqual(result.tags, ["groq", "projections"]);
|
|
165
|
+
assert.equal(result.status, "active");
|
|
166
|
+
assert.deepEqual(result.extraVars, {
|
|
167
|
+
difficulty: "advanced",
|
|
168
|
+
topic: "projections",
|
|
169
|
+
});
|
|
170
|
+
});
|
|
171
|
+
it("uses prompt.template as fallback when prompt.text is absent", () => {
|
|
172
|
+
const task = {
|
|
173
|
+
mode: "literacy",
|
|
174
|
+
id: "template-task",
|
|
175
|
+
title: "Template-based task",
|
|
176
|
+
prompt: { template: "my-named-template" },
|
|
177
|
+
};
|
|
178
|
+
const result = toLiteracyTask(task);
|
|
179
|
+
assert.equal(result.taskPrompt, "my-named-template");
|
|
180
|
+
});
|
|
181
|
+
it("does not set optional fields when absent in new task", () => {
|
|
182
|
+
const result = toLiteracyTask(minimalNewTask);
|
|
183
|
+
assert.equal(result.baseline, undefined);
|
|
184
|
+
assert.equal(result.tags, undefined);
|
|
185
|
+
assert.equal(result.status, undefined);
|
|
186
|
+
assert.equal(result.extraVars, undefined);
|
|
187
|
+
});
|
|
188
|
+
});
|
|
189
|
+
// ---------------------------------------------------------------------------
|
|
190
|
+
// Round-trip: old → new → old (must be lossless)
|
|
191
|
+
// ---------------------------------------------------------------------------
|
|
192
|
+
describe("round-trip: toLiteracyTask(toGeneralized(oldTask))", () => {
|
|
193
|
+
it("preserves all fields of a minimal old task", () => {
|
|
194
|
+
const roundTripped = toLiteracyTask(toGeneralized(minimalOldTask));
|
|
195
|
+
assert.deepEqual(roundTripped, minimalOldTask);
|
|
196
|
+
});
|
|
197
|
+
it("preserves all fields of a fully-populated old task", () => {
|
|
198
|
+
const roundTripped = toLiteracyTask(toGeneralized(fullOldTask));
|
|
199
|
+
assert.deepEqual(roundTripped, fullOldTask);
|
|
200
|
+
});
|
|
201
|
+
});
|
|
202
|
+
// ---------------------------------------------------------------------------
|
|
203
|
+
// Round-trip: new → old → new (lossless for mappable fields)
|
|
204
|
+
// ---------------------------------------------------------------------------
|
|
205
|
+
describe("round-trip: toGeneralized(toLiteracyTask(newTask))", () => {
|
|
206
|
+
it("preserves all fields of a minimal new task", () => {
|
|
207
|
+
const roundTripped = toGeneralized(toLiteracyTask(minimalNewTask));
|
|
208
|
+
assert.equal(roundTripped.mode, "literacy");
|
|
209
|
+
assert.equal(roundTripped.id, minimalNewTask.id);
|
|
210
|
+
assert.equal(roundTripped.title, minimalNewTask.title);
|
|
211
|
+
});
|
|
212
|
+
it("preserves all mappable fields of a fully-populated new task", () => {
|
|
213
|
+
const roundTripped = toGeneralized(toLiteracyTask(fullNewTask));
|
|
214
|
+
assert.equal(roundTripped.mode, "literacy");
|
|
215
|
+
assert.equal(roundTripped.id, fullNewTask.id);
|
|
216
|
+
assert.equal(roundTripped.title, fullNewTask.title);
|
|
217
|
+
assert.equal(roundTripped.area, fullNewTask.area);
|
|
218
|
+
assert.deepEqual(roundTripped.tags, fullNewTask.tags);
|
|
219
|
+
assert.equal(roundTripped.status, fullNewTask.status);
|
|
220
|
+
assert.deepEqual(roundTripped.assertions, fullNewTask.assertions);
|
|
221
|
+
assert.equal(roundTripped.prompt?.text, fullNewTask.prompt?.text);
|
|
222
|
+
assert.deepEqual(roundTripped.prompt?.vars, fullNewTask.prompt?.vars);
|
|
223
|
+
assert.deepEqual(roundTripped.context?.docs, fullNewTask.context?.docs);
|
|
224
|
+
assert.equal(roundTripped.referenceSolution, fullNewTask.referenceSolution);
|
|
225
|
+
assert.equal(roundTripped.docCoverage, fullNewTask.docCoverage);
|
|
226
|
+
assert.deepEqual(roundTripped.baseline, fullNewTask.baseline);
|
|
227
|
+
});
|
|
228
|
+
});
|
|
229
|
+
// ---------------------------------------------------------------------------
|
|
230
|
+
// CanonicalDocRef ↔ GeneralizedDocRef mapping (all 4 variants)
|
|
231
|
+
// ---------------------------------------------------------------------------
|
|
232
|
+
describe("doc ref mapping", () => {
|
|
233
|
+
const slugRef = {
|
|
234
|
+
slug: "my-article",
|
|
235
|
+
reason: "testing slug",
|
|
236
|
+
};
|
|
237
|
+
const pathRef = {
|
|
238
|
+
path: "/docs/my-article",
|
|
239
|
+
reason: "testing path",
|
|
240
|
+
};
|
|
241
|
+
const idRef = {
|
|
242
|
+
id: "abc-123",
|
|
243
|
+
reason: "testing id",
|
|
244
|
+
slug: "annotated-slug",
|
|
245
|
+
path: "/docs/annotated",
|
|
246
|
+
};
|
|
247
|
+
const perspectiveRef = {
|
|
248
|
+
perspective: "release-v4",
|
|
249
|
+
reason: "testing perspective",
|
|
250
|
+
};
|
|
251
|
+
it("preserves slug ref through round-trip", () => {
|
|
252
|
+
const task = { ...minimalOldTask, canonicalDocs: [slugRef] };
|
|
253
|
+
const roundTripped = toLiteracyTask(toGeneralized(task));
|
|
254
|
+
assert.deepEqual(roundTripped.canonicalDocs, [slugRef]);
|
|
255
|
+
});
|
|
256
|
+
it("preserves path ref through round-trip", () => {
|
|
257
|
+
const task = { ...minimalOldTask, canonicalDocs: [pathRef] };
|
|
258
|
+
const roundTripped = toLiteracyTask(toGeneralized(task));
|
|
259
|
+
assert.deepEqual(roundTripped.canonicalDocs, [pathRef]);
|
|
260
|
+
});
|
|
261
|
+
it("preserves id ref (with optional slug/path annotations) through round-trip", () => {
|
|
262
|
+
const task = { ...minimalOldTask, canonicalDocs: [idRef] };
|
|
263
|
+
const roundTripped = toLiteracyTask(toGeneralized(task));
|
|
264
|
+
assert.deepEqual(roundTripped.canonicalDocs, [idRef]);
|
|
265
|
+
});
|
|
266
|
+
it("preserves perspective ref through round-trip", () => {
|
|
267
|
+
const task = {
|
|
268
|
+
...minimalOldTask,
|
|
269
|
+
canonicalDocs: [perspectiveRef],
|
|
270
|
+
};
|
|
271
|
+
const roundTripped = toLiteracyTask(toGeneralized(task));
|
|
272
|
+
assert.deepEqual(roundTripped.canonicalDocs, [perspectiveRef]);
|
|
273
|
+
});
|
|
274
|
+
it("preserves all 4 ref variants together through round-trip", () => {
|
|
275
|
+
const allRefs = [slugRef, pathRef, idRef, perspectiveRef];
|
|
276
|
+
const task = { ...minimalOldTask, canonicalDocs: allRefs };
|
|
277
|
+
const roundTripped = toLiteracyTask(toGeneralized(task));
|
|
278
|
+
assert.deepEqual(roundTripped.canonicalDocs, allRefs);
|
|
279
|
+
});
|
|
280
|
+
});
|
|
281
|
+
// ---------------------------------------------------------------------------
|
|
282
|
+
// Assertion type mapping
|
|
283
|
+
// ---------------------------------------------------------------------------
|
|
284
|
+
describe("assertion type mapping", () => {
|
|
285
|
+
const templatedAssertion = {
|
|
286
|
+
type: "llm-rubric",
|
|
287
|
+
template: "code-quality",
|
|
288
|
+
criteria: ["correct", "idiomatic", "secure"],
|
|
289
|
+
weight: 2,
|
|
290
|
+
};
|
|
291
|
+
const valueAssertion = {
|
|
292
|
+
type: "contains",
|
|
293
|
+
value: "createDocument",
|
|
294
|
+
};
|
|
295
|
+
const jsAssertion = {
|
|
296
|
+
type: "javascript",
|
|
297
|
+
value: "output.includes('done')",
|
|
298
|
+
weight: 1,
|
|
299
|
+
};
|
|
300
|
+
it("preserves templated assertions through old→new→old round-trip", () => {
|
|
301
|
+
const task = {
|
|
302
|
+
...minimalOldTask,
|
|
303
|
+
assertions: [templatedAssertion],
|
|
304
|
+
};
|
|
305
|
+
const roundTripped = toLiteracyTask(toGeneralized(task));
|
|
306
|
+
assert.deepEqual(roundTripped.assertions, [templatedAssertion]);
|
|
307
|
+
});
|
|
308
|
+
it("preserves value assertions through old→new→old round-trip", () => {
|
|
309
|
+
const task = {
|
|
310
|
+
...minimalOldTask,
|
|
311
|
+
assertions: [valueAssertion],
|
|
312
|
+
};
|
|
313
|
+
const roundTripped = toLiteracyTask(toGeneralized(task));
|
|
314
|
+
assert.deepEqual(roundTripped.assertions, [valueAssertion]);
|
|
315
|
+
});
|
|
316
|
+
it("preserves mixed assertion types through old→new→old round-trip", () => {
|
|
317
|
+
const mixed = [templatedAssertion, valueAssertion, jsAssertion];
|
|
318
|
+
const task = { ...minimalOldTask, assertions: mixed };
|
|
319
|
+
const roundTripped = toLiteracyTask(toGeneralized(task));
|
|
320
|
+
assert.deepEqual(roundTripped.assertions, mixed);
|
|
321
|
+
});
|
|
322
|
+
it("preserves assertions through new→old→new round-trip", () => {
|
|
323
|
+
const genAssertions = [
|
|
324
|
+
{
|
|
325
|
+
type: "llm-rubric",
|
|
326
|
+
template: "completeness",
|
|
327
|
+
criteria: ["thorough"],
|
|
328
|
+
weight: 1,
|
|
329
|
+
},
|
|
330
|
+
{ type: "regex", value: "^import.*sanity" },
|
|
331
|
+
];
|
|
332
|
+
const task = {
|
|
333
|
+
...minimalNewTask,
|
|
334
|
+
assertions: genAssertions,
|
|
335
|
+
};
|
|
336
|
+
const roundTripped = toGeneralized(toLiteracyTask(task));
|
|
337
|
+
assert.deepEqual(roundTripped.assertions, genAssertions);
|
|
338
|
+
});
|
|
339
|
+
});
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* tool-loop-openai.test.ts — Tests for the OpenAI MCP tool loop.
|
|
3
|
+
*
|
|
4
|
+
* Tests both API variants (Chat Completions and Responses) with mocked
|
|
5
|
+
* fetch to verify tool calling, error handling, token tracking, and
|
|
6
|
+
* round exhaustion.
|
|
7
|
+
*
|
|
8
|
+
* Run: npx tsx --test src/pipeline/compiler/__tests__/tool-loop-openai.test.ts
|
|
9
|
+
*/
|
|
10
|
+
export {};
|