@sanity/ailf 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +28 -23
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +6 -0
- package/dist/_vendor/ailf-core/config-helpers.js +29 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +164 -94
- package/dist/_vendor/ailf-core/examples/index.js +208 -114
- package/dist/_vendor/ailf-core/index.d.ts +1 -0
- package/dist/_vendor/ailf-core/index.js +1 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +20 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +14 -2
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +12 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +47 -4
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +27 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
- package/dist/_vendor/ailf-tasks/cli.js +61 -0
- package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
- package/dist/_vendor/ailf-tasks/index.js +16 -0
- package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
- package/dist/_vendor/ailf-tasks/parser.js +73 -0
- package/dist/_vendor/ailf-tasks/schemas.d.ts +198 -0
- package/dist/_vendor/ailf-tasks/schemas.js +180 -0
- package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
- package/dist/_vendor/ailf-tasks/validation.js +162 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +6 -1
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +4 -6
- package/dist/adapters/task-sources/index.d.ts +1 -2
- package/dist/adapters/task-sources/index.js +1 -2
- package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
- package/dist/adapters/task-sources/repo-schemas.js +2 -2
- package/dist/adapters/task-sources/repo-task-source.js +1 -1
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/task-file-loader.d.ts +9 -6
- package/dist/adapters/task-sources/task-file-loader.js +20 -6
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +2 -3
- package/dist/commands/init.js +56 -170
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/composition-root.d.ts +2 -3
- package/dist/composition-root.js +27 -14
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +30 -16
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +50 -15
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +52 -32
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/checks.d.ts +8 -3
- package/dist/pipeline/checks.js +23 -3
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +33 -0
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/literacy-bridge.js +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +1 -1
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +1 -1
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +13 -2
- package/dist/pipeline/compiler/mode-bases/literacy.js +55 -1
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +1 -1
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +2 -2
- package/dist/pipeline/compiler/mode-handlers/index.js +2 -2
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
- package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +1 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +69 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +307 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +22 -5
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +6 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +10 -5
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +314 -7
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +10 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +1 -1
- package/dist/pipeline/compiler/presets/sanity-literacy.js +1 -1
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +15 -8
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +15 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
- package/dist/pipeline/mirror-repo-tasks.js +1 -1
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +67 -29
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +25 -25
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reference Solution: Custom Block Types in Portable Text
|
|
3
|
+
*
|
|
4
|
+
* Demonstrates:
|
|
5
|
+
* - Defining custom block types with defineArrayMember
|
|
6
|
+
* - Adding "code" and "callout" blocks to a PT field
|
|
7
|
+
* - Rendering custom blocks with @portabletext/react
|
|
8
|
+
* - TypeScript types for custom block shapes
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
// === Part 1: Schema Definition (schemas/post.ts) ===
|
|
12
|
+
|
|
13
|
+
import {
|
|
14
|
+
defineType,
|
|
15
|
+
defineField,
|
|
16
|
+
defineArrayMember,
|
|
17
|
+
} from "sanity"
|
|
18
|
+
|
|
19
|
+
export const postType = defineType({
|
|
20
|
+
name: "post",
|
|
21
|
+
title: "Post",
|
|
22
|
+
type: "document",
|
|
23
|
+
fields: [
|
|
24
|
+
defineField({
|
|
25
|
+
name: "title",
|
|
26
|
+
title: "Title",
|
|
27
|
+
type: "string",
|
|
28
|
+
}),
|
|
29
|
+
defineField({
|
|
30
|
+
name: "body",
|
|
31
|
+
title: "Body",
|
|
32
|
+
type: "array",
|
|
33
|
+
of: [
|
|
34
|
+
// Standard block (paragraphs, headings, lists)
|
|
35
|
+
defineArrayMember({
|
|
36
|
+
type: "block",
|
|
37
|
+
styles: [
|
|
38
|
+
{ title: "Normal", value: "normal" },
|
|
39
|
+
{ title: "H2", value: "h2" },
|
|
40
|
+
{ title: "H3", value: "h3" },
|
|
41
|
+
{ title: "Quote", value: "blockquote" },
|
|
42
|
+
],
|
|
43
|
+
marks: {
|
|
44
|
+
decorators: [
|
|
45
|
+
{ title: "Bold", value: "strong" },
|
|
46
|
+
{ title: "Italic", value: "em" },
|
|
47
|
+
{ title: "Code", value: "code" },
|
|
48
|
+
],
|
|
49
|
+
},
|
|
50
|
+
}),
|
|
51
|
+
|
|
52
|
+
// Custom: Code block with language selection
|
|
53
|
+
defineArrayMember({
|
|
54
|
+
name: "code",
|
|
55
|
+
title: "Code Block",
|
|
56
|
+
type: "object",
|
|
57
|
+
fields: [
|
|
58
|
+
defineField({
|
|
59
|
+
name: "language",
|
|
60
|
+
title: "Language",
|
|
61
|
+
type: "string",
|
|
62
|
+
options: {
|
|
63
|
+
list: [
|
|
64
|
+
{ title: "JavaScript", value: "javascript" },
|
|
65
|
+
{ title: "TypeScript", value: "typescript" },
|
|
66
|
+
{ title: "HTML", value: "html" },
|
|
67
|
+
{ title: "CSS", value: "css" },
|
|
68
|
+
{ title: "Shell", value: "bash" },
|
|
69
|
+
{ title: "JSON", value: "json" },
|
|
70
|
+
],
|
|
71
|
+
},
|
|
72
|
+
}),
|
|
73
|
+
defineField({
|
|
74
|
+
name: "code",
|
|
75
|
+
title: "Code",
|
|
76
|
+
type: "text",
|
|
77
|
+
rows: 10,
|
|
78
|
+
}),
|
|
79
|
+
defineField({
|
|
80
|
+
name: "filename",
|
|
81
|
+
title: "Filename",
|
|
82
|
+
type: "string",
|
|
83
|
+
}),
|
|
84
|
+
],
|
|
85
|
+
preview: {
|
|
86
|
+
select: { language: "language", code: "code" },
|
|
87
|
+
prepare({ language, code }) {
|
|
88
|
+
return {
|
|
89
|
+
title: `Code: ${language || "plain"}`,
|
|
90
|
+
subtitle: code ? code.slice(0, 50) + "…" : "",
|
|
91
|
+
}
|
|
92
|
+
},
|
|
93
|
+
},
|
|
94
|
+
}),
|
|
95
|
+
|
|
96
|
+
// Custom: Callout block with tone
|
|
97
|
+
defineArrayMember({
|
|
98
|
+
name: "callout",
|
|
99
|
+
title: "Callout",
|
|
100
|
+
type: "object",
|
|
101
|
+
fields: [
|
|
102
|
+
defineField({
|
|
103
|
+
name: "tone",
|
|
104
|
+
title: "Tone",
|
|
105
|
+
type: "string",
|
|
106
|
+
options: {
|
|
107
|
+
list: [
|
|
108
|
+
{ title: "Info", value: "info" },
|
|
109
|
+
{ title: "Warning", value: "warning" },
|
|
110
|
+
{ title: "Error", value: "error" },
|
|
111
|
+
{ title: "Tip", value: "tip" },
|
|
112
|
+
],
|
|
113
|
+
layout: "radio",
|
|
114
|
+
},
|
|
115
|
+
initialValue: "info",
|
|
116
|
+
}),
|
|
117
|
+
defineField({
|
|
118
|
+
name: "text",
|
|
119
|
+
title: "Text",
|
|
120
|
+
type: "text",
|
|
121
|
+
rows: 3,
|
|
122
|
+
}),
|
|
123
|
+
],
|
|
124
|
+
preview: {
|
|
125
|
+
select: { tone: "tone", text: "text" },
|
|
126
|
+
prepare({ tone, text }) {
|
|
127
|
+
const icons = { info: "ℹ️", warning: "⚠️", error: "🚨", tip: "💡" }
|
|
128
|
+
return {
|
|
129
|
+
title: `${icons[tone as keyof typeof icons] || ""} ${tone || "info"} callout`,
|
|
130
|
+
subtitle: text,
|
|
131
|
+
}
|
|
132
|
+
},
|
|
133
|
+
},
|
|
134
|
+
}),
|
|
135
|
+
],
|
|
136
|
+
}),
|
|
137
|
+
],
|
|
138
|
+
})
|
|
139
|
+
|
|
140
|
+
// === Part 2: Frontend Rendering (components/PortableTextBody.tsx) ===
|
|
141
|
+
|
|
142
|
+
// import { PortableText, type PortableTextComponents } from "@portabletext/react"
|
|
143
|
+
|
|
144
|
+
interface CodeBlockValue {
|
|
145
|
+
_type: "code"
|
|
146
|
+
language?: string
|
|
147
|
+
code: string
|
|
148
|
+
filename?: string
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
interface CalloutBlockValue {
|
|
152
|
+
_type: "callout"
|
|
153
|
+
tone: "info" | "warning" | "error" | "tip"
|
|
154
|
+
text: string
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function CodeBlock({ value }: { value: CodeBlockValue }) {
|
|
158
|
+
return (
|
|
159
|
+
<figure>
|
|
160
|
+
{value.filename && (
|
|
161
|
+
<figcaption>{value.filename}</figcaption>
|
|
162
|
+
)}
|
|
163
|
+
<pre data-language={value.language}>
|
|
164
|
+
<code>{value.code}</code>
|
|
165
|
+
</pre>
|
|
166
|
+
</figure>
|
|
167
|
+
)
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const toneStyles: Record<string, { background: string; border: string }> = {
|
|
171
|
+
info: { background: "#e8f4fd", border: "#2196f3" },
|
|
172
|
+
warning: { background: "#fff8e1", border: "#ff9800" },
|
|
173
|
+
error: { background: "#fde8e8", border: "#f44336" },
|
|
174
|
+
tip: { background: "#e8f5e9", border: "#4caf50" },
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function CalloutBlock({ value }: { value: CalloutBlockValue }) {
|
|
178
|
+
const style = toneStyles[value.tone] || toneStyles.info
|
|
179
|
+
|
|
180
|
+
return (
|
|
181
|
+
<aside
|
|
182
|
+
role="note"
|
|
183
|
+
style={{
|
|
184
|
+
padding: "1rem",
|
|
185
|
+
borderLeft: `4px solid ${style.border}`,
|
|
186
|
+
background: style.background,
|
|
187
|
+
margin: "1.5rem 0",
|
|
188
|
+
}}
|
|
189
|
+
>
|
|
190
|
+
<p>{value.text}</p>
|
|
191
|
+
</aside>
|
|
192
|
+
)
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Register custom types in the PortableText component map
|
|
196
|
+
export const components = {
|
|
197
|
+
types: {
|
|
198
|
+
code: CodeBlock,
|
|
199
|
+
callout: CalloutBlock,
|
|
200
|
+
},
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Usage:
|
|
204
|
+
// <PortableText value={post.body} components={components} />
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reference Solution: Portable Text Rendering in React
|
|
3
|
+
*
|
|
4
|
+
* Demonstrates:
|
|
5
|
+
* - Setting up @portabletext/react with custom components
|
|
6
|
+
* - Handling image blocks with @sanity/image-url
|
|
7
|
+
* - Custom marks for links (internal + external)
|
|
8
|
+
* - Code block rendering with language metadata
|
|
9
|
+
* - TypeScript types for component props
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { PortableText, type PortableTextComponents } from "@portabletext/react"
|
|
13
|
+
import imageUrlBuilder from "@sanity/image-url"
|
|
14
|
+
import { createClient } from "@sanity/client"
|
|
15
|
+
|
|
16
|
+
// === Sanity Client & Image Builder ===
|
|
17
|
+
|
|
18
|
+
const client = createClient({
|
|
19
|
+
projectId: "your-project-id",
|
|
20
|
+
dataset: "production",
|
|
21
|
+
apiVersion: "2024-01-01",
|
|
22
|
+
useCdn: true,
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
const builder = imageUrlBuilder(client)
|
|
26
|
+
|
|
27
|
+
function urlFor(source: SanityImageSource) {
|
|
28
|
+
return builder.image(source)
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// === Types ===
|
|
32
|
+
|
|
33
|
+
interface SanityImageSource {
|
|
34
|
+
_type: "image"
|
|
35
|
+
asset: {
|
|
36
|
+
_ref: string
|
|
37
|
+
_type: "reference"
|
|
38
|
+
}
|
|
39
|
+
hotspot?: { x: number; y: number; height: number; width: number }
|
|
40
|
+
crop?: { top: number; bottom: number; left: number; right: number }
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
interface ImageBlockValue {
|
|
44
|
+
_type: "image"
|
|
45
|
+
asset: { _ref: string; _type: "reference" }
|
|
46
|
+
hotspot?: SanityImageSource["hotspot"]
|
|
47
|
+
crop?: SanityImageSource["crop"]
|
|
48
|
+
alt?: string
|
|
49
|
+
caption?: string
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
interface CodeBlockValue {
|
|
53
|
+
_type: "code"
|
|
54
|
+
language?: string
|
|
55
|
+
code: string
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
interface LinkMarkValue {
|
|
59
|
+
_type: "link"
|
|
60
|
+
href: string
|
|
61
|
+
blank?: boolean
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
interface InternalLinkMarkValue {
|
|
65
|
+
_type: "internalLink"
|
|
66
|
+
reference: { _ref: string; _type: "reference" }
|
|
67
|
+
slug?: { current: string }
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
interface PortableTextBodyProps {
|
|
71
|
+
value: unknown[]
|
|
72
|
+
className?: string
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// === Custom Components ===
|
|
76
|
+
|
|
77
|
+
// Image block — renders Sanity images with hotspot/crop via the URL builder
|
|
78
|
+
function ImageBlock({ value }: { value: ImageBlockValue }) {
|
|
79
|
+
if (!value?.asset) return null
|
|
80
|
+
|
|
81
|
+
const url = urlFor(value)
|
|
82
|
+
.width(800)
|
|
83
|
+
.auto("format")
|
|
84
|
+
.url()
|
|
85
|
+
|
|
86
|
+
return (
|
|
87
|
+
<figure>
|
|
88
|
+
<img
|
|
89
|
+
src={url}
|
|
90
|
+
alt={value.alt || ""}
|
|
91
|
+
loading="lazy"
|
|
92
|
+
style={{ maxWidth: "100%", height: "auto" }}
|
|
93
|
+
/>
|
|
94
|
+
{value.caption && <figcaption>{value.caption}</figcaption>}
|
|
95
|
+
</figure>
|
|
96
|
+
)
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Code block — renders preformatted code with language annotation
|
|
100
|
+
function CodeBlock({ value }: { value: CodeBlockValue }) {
|
|
101
|
+
return (
|
|
102
|
+
<pre data-language={value.language}>
|
|
103
|
+
<code>{value.code}</code>
|
|
104
|
+
</pre>
|
|
105
|
+
)
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// External link mark — opens in new tab when blank is true
|
|
109
|
+
function LinkMark({
|
|
110
|
+
value,
|
|
111
|
+
children,
|
|
112
|
+
}: {
|
|
113
|
+
value?: LinkMarkValue
|
|
114
|
+
children: React.ReactNode
|
|
115
|
+
}) {
|
|
116
|
+
const target = value?.blank ? "_blank" : undefined
|
|
117
|
+
const rel = value?.blank ? "noopener noreferrer" : undefined
|
|
118
|
+
|
|
119
|
+
return (
|
|
120
|
+
<a href={value?.href} target={target} rel={rel}>
|
|
121
|
+
{children}
|
|
122
|
+
</a>
|
|
123
|
+
)
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Internal link mark — resolves to a local route
|
|
127
|
+
function InternalLinkMark({
|
|
128
|
+
value,
|
|
129
|
+
children,
|
|
130
|
+
}: {
|
|
131
|
+
value?: InternalLinkMarkValue
|
|
132
|
+
children: React.ReactNode
|
|
133
|
+
}) {
|
|
134
|
+
const slug = value?.slug?.current
|
|
135
|
+
const href = slug ? `/${slug}` : "#"
|
|
136
|
+
|
|
137
|
+
return <a href={href}>{children}</a>
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// === Component Map ===
|
|
141
|
+
|
|
142
|
+
const components: PortableTextComponents = {
|
|
143
|
+
types: {
|
|
144
|
+
image: ImageBlock,
|
|
145
|
+
code: CodeBlock,
|
|
146
|
+
},
|
|
147
|
+
marks: {
|
|
148
|
+
link: LinkMark,
|
|
149
|
+
internalLink: InternalLinkMark,
|
|
150
|
+
},
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// === Main Component ===
|
|
154
|
+
|
|
155
|
+
export function PortableTextBody({ value, className }: PortableTextBodyProps) {
|
|
156
|
+
if (!value) return null
|
|
157
|
+
|
|
158
|
+
return (
|
|
159
|
+
<div className={className}>
|
|
160
|
+
<PortableText value={value} components={components} />
|
|
161
|
+
</div>
|
|
162
|
+
)
|
|
163
|
+
}
|
package/config/features.ts
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
* })
|
|
16
16
|
*
|
|
17
17
|
* @see packages/eval/src/pipeline/compiler/presets/sanity-literacy.ts
|
|
18
|
-
* @see docs/exec-plans/scenario-matrix-implementation/phase-3-gap-analysis.md
|
|
18
|
+
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-3-gap-analysis.md
|
|
19
19
|
*/
|
|
20
20
|
|
|
21
21
|
import { defineFeatures } from "@sanity/ailf-core"
|
package/config/models.ts
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* models.ts — Central model registry for AILF evaluations.
|
|
3
3
|
*
|
|
4
|
-
* Define all models to test here. Each
|
|
5
|
-
*
|
|
4
|
+
* Define all models to test here. Each model declares which evaluation
|
|
5
|
+
* modes it participates in (e.g., "literacy", "mcp-server") and
|
|
6
|
+
* optionally which variants within those modes.
|
|
6
7
|
*
|
|
7
|
-
*
|
|
8
|
+
* When a model enrolls in a mode without specifying variants, all
|
|
9
|
+
* variants defined by the mode base are included by default.
|
|
10
|
+
*
|
|
11
|
+
* @see docs/archive/exec-plans/architecture-overhaul/phase-1-ts-config-loading.md
|
|
8
12
|
*/
|
|
9
13
|
|
|
10
14
|
import { defineModels } from "@sanity/ailf-core"
|
|
@@ -16,13 +20,9 @@ export default defineModels({
|
|
|
16
20
|
id: "anthropic:messages:claude-opus-4-6",
|
|
17
21
|
label: "Claude Opus 4.6",
|
|
18
22
|
config: { temperature: 0.2, max_tokens: 4096 },
|
|
19
|
-
modes: [
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
"agentic-naive",
|
|
23
|
-
"agentic-optimized",
|
|
24
|
-
"mcp-server",
|
|
25
|
-
],
|
|
23
|
+
modes: ["literacy", "mcp-server", "knowledge-probe"],
|
|
24
|
+
// All literacy variants included by default (baseline, observed,
|
|
25
|
+
// agentic-naive, agentic-optimized)
|
|
26
26
|
},
|
|
27
27
|
|
|
28
28
|
// ── Google ─────────────────────────────────────────────────
|
|
@@ -30,7 +30,7 @@ export default defineModels({
|
|
|
30
30
|
// id: "google:gemini-2.5-pro",
|
|
31
31
|
// label: "Gemini 2.5 Pro",
|
|
32
32
|
// config: { temperature: 0.2, max_tokens: 4096 },
|
|
33
|
-
// modes: ["
|
|
33
|
+
// modes: ["literacy"],
|
|
34
34
|
// },
|
|
35
35
|
|
|
36
36
|
// ── OpenAI ─────────────────────────────────────────────────
|
|
@@ -38,7 +38,8 @@ export default defineModels({
|
|
|
38
38
|
id: "openai:chat:gpt-5.2",
|
|
39
39
|
label: "GPT 5.2",
|
|
40
40
|
config: { temperature: 0.2, max_tokens: 4096 },
|
|
41
|
-
modes: ["
|
|
41
|
+
modes: ["literacy", "knowledge-probe"],
|
|
42
|
+
// All literacy variants included by default
|
|
42
43
|
},
|
|
43
44
|
{
|
|
44
45
|
id: "openai:responses:gpt-5.4",
|
|
@@ -48,25 +49,28 @@ export default defineModels({
|
|
|
48
49
|
max_output_tokens: 4096,
|
|
49
50
|
maxRetries: 1,
|
|
50
51
|
},
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
"agentic-naive",
|
|
55
|
-
"agentic-optimized",
|
|
56
|
-
"mcp-server",
|
|
57
|
-
],
|
|
52
|
+
timeoutMs: 600_000, // 10 min — reasoning model needs more headroom
|
|
53
|
+
modes: ["literacy", "mcp-server", "knowledge-probe"],
|
|
54
|
+
// All literacy variants included by default
|
|
58
55
|
},
|
|
59
56
|
|
|
60
57
|
// ── Disabled models (uncomment to enable) ──────────────────
|
|
61
58
|
// { id: "anthropic:claude-sonnet-4-20250514", label: "Claude Sonnet 4",
|
|
62
|
-
// config: { temperature: 0.2, max_tokens: 4096 },
|
|
59
|
+
// config: { temperature: 0.2, max_tokens: 4096 },
|
|
60
|
+
// modes: ["literacy"],
|
|
61
|
+
// variants: { literacy: ["baseline"] } },
|
|
63
62
|
// { id: "anthropic:claude-3.5-sonnet-20241022", label: "Claude 3.5 Sonnet",
|
|
64
63
|
// config: { temperature: 0.2, max_tokens: 4096 },
|
|
65
|
-
// modes: ["
|
|
64
|
+
// modes: ["literacy"],
|
|
65
|
+
// variants: { literacy: ["baseline", "agentic-naive", "agentic-optimized"] } },
|
|
66
66
|
// { id: "google:gemini-2.0-flash", label: "Gemini 2.0 Flash",
|
|
67
|
-
// config: { temperature: 0.2, max_tokens: 4096 },
|
|
67
|
+
// config: { temperature: 0.2, max_tokens: 4096 },
|
|
68
|
+
// modes: ["literacy"],
|
|
69
|
+
// variants: { literacy: ["baseline"] } },
|
|
68
70
|
// { id: "openrouter:deepseek/deepseek-r1", label: "DeepSeek R1",
|
|
69
|
-
// config: { temperature: 0.2, max_tokens: 4096 },
|
|
71
|
+
// config: { temperature: 0.2, max_tokens: 4096 },
|
|
72
|
+
// modes: ["literacy"],
|
|
73
|
+
// variants: { literacy: ["baseline"] } },
|
|
70
74
|
],
|
|
71
75
|
|
|
72
76
|
// ── Grading Model ──────────────────────────────────────────
|
|
@@ -77,6 +81,7 @@ export default defineModels({
|
|
|
77
81
|
},
|
|
78
82
|
|
|
79
83
|
// ── Evaluation Options ─────────────────────────────────────
|
|
84
|
+
evalBudgetMs: 1_200_000, // 20 min per eval mode — outer kill switch
|
|
80
85
|
maxConcurrency: 32, // max parallel API calls — benchmarked in DOC-1896
|
|
81
86
|
|
|
82
87
|
// ── Default Config ─────────────────────────────────────────
|
package/config/sources.ts
CHANGED
package/config/thresholds.ts
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* - `npx @sanity/ailf pipeline --publish` (severity-aware sink routing)
|
|
7
7
|
* - `npx @sanity/ailf pipeline --compare` (regression alerting)
|
|
8
8
|
*
|
|
9
|
-
* @see docs/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
|
|
9
|
+
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
12
|
import { defineThresholds } from "@sanity/ailf-core"
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* comparison-formatters.test.ts
|
|
3
|
+
*
|
|
4
|
+
* Verifies that formatComparisonMarkdown() and formatComparisonTable()
|
|
5
|
+
* dynamically derive column headers from the dimension keys present
|
|
6
|
+
* in the report data, rather than hardcoding literacy-specific names.
|
|
7
|
+
*
|
|
8
|
+
* Run: npx tsx --test src/__tests__/comparison-formatters.test.ts
|
|
9
|
+
*/
|
|
10
|
+
export {};
|