npm - @sanity/ailf - Versions diffs - 2.0.0 → 2.0.1 - Mend

@sanity/ailf 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (442) hide show

package/canonical/grader-references/README.md +2 -2
package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
package/config/features.ts +1 -1
package/config/models.ts +28 -23
package/config/sources.ts +1 -1
package/config/thresholds.ts +1 -1
package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
package/dist/_vendor/ailf-core/config-helpers.d.ts +6 -0
package/dist/_vendor/ailf-core/config-helpers.js +29 -0
package/dist/_vendor/ailf-core/examples/index.d.ts +164 -94
package/dist/_vendor/ailf-core/examples/index.js +208 -114
package/dist/_vendor/ailf-core/index.d.ts +1 -0
package/dist/_vendor/ailf-core/index.js +1 -0
package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
package/dist/_vendor/ailf-core/ports/context.d.ts +20 -1
package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -1
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +14 -2
package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
package/dist/_vendor/ailf-core/services/index.js +1 -1
package/dist/_vendor/ailf-core/services/scoring.js +9 -0
package/dist/_vendor/ailf-core/types/generalized-task.d.ts +12 -1
package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
package/dist/_vendor/ailf-core/types/index.d.ts +47 -4
package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +27 -0
package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
package/dist/_vendor/ailf-tasks/cli.js +61 -0
package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
package/dist/_vendor/ailf-tasks/index.js +16 -0
package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
package/dist/_vendor/ailf-tasks/parser.js +73 -0
package/dist/_vendor/ailf-tasks/schemas.d.ts +198 -0
package/dist/_vendor/ailf-tasks/schemas.js +180 -0
package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
package/dist/_vendor/ailf-tasks/validation.js +162 -0
package/dist/adapters/api-client/remediation.js +2 -2
package/dist/adapters/config-sources/file-config-adapter.js +6 -1
package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
package/dist/adapters/index.d.ts +0 -1
package/dist/adapters/index.js +0 -1
package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
package/dist/adapters/task-sources/composite-task-source.js +1 -1
package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
package/dist/adapters/task-sources/content-lake-task-source.js +4 -6
package/dist/adapters/task-sources/index.d.ts +1 -2
package/dist/adapters/task-sources/index.js +1 -2
package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
package/dist/adapters/task-sources/repo-schemas.js +2 -2
package/dist/adapters/task-sources/repo-task-source.js +1 -1
package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
package/dist/adapters/task-sources/repo-trigger.js +1 -1
package/dist/adapters/task-sources/task-file-loader.d.ts +9 -6
package/dist/adapters/task-sources/task-file-loader.js +20 -6
package/dist/agent-observer/test-imports.d.ts +7 -0
package/dist/agent-observer/test-imports.js +185 -0
package/dist/artifact-capture/comparator.d.ts +22 -0
package/dist/artifact-capture/comparator.js +493 -0
package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
package/dist/artifact-capture/filesystem-collector.js +237 -0
package/dist/artifact-capture/redact-artifact.d.ts +20 -0
package/dist/artifact-capture/redact-artifact.js +115 -0
package/dist/assertions/source-isolation.d.ts +1 -1
package/dist/assertions/source-isolation.js +1 -1
package/dist/cli.js +4 -0
package/dist/commands/calculate-scores.js +1 -0
package/dist/commands/capture-compare.d.ts +15 -0
package/dist/commands/capture-compare.js +253 -0
package/dist/commands/capture-list.d.ts +12 -0
package/dist/commands/capture-list.js +147 -0
package/dist/commands/capture.d.ts +9 -0
package/dist/commands/capture.js +16 -0
package/dist/commands/chronic-failures.d.ts +8 -0
package/dist/commands/chronic-failures.js +33 -0
package/dist/commands/explain-handler.d.ts +1 -1
package/dist/commands/explain-handler.js +37 -8
package/dist/commands/fetch-docs.js +1 -0
package/dist/commands/generate-configs.d.ts +3 -3
package/dist/commands/generate-configs.js +20 -8
package/dist/commands/init.d.ts +2 -3
package/dist/commands/init.js +56 -170
package/dist/commands/pipeline-action.d.ts +7 -1
package/dist/commands/pipeline-action.js +43 -19
package/dist/commands/pipeline.d.ts +6 -1
package/dist/commands/pipeline.js +7 -2
package/dist/commands/pr-comment.js +1 -0
package/dist/commands/publish.js +1 -0
package/dist/commands/shared/help.js +2 -2
package/dist/commands/update-quality-scores.d.ts +5 -0
package/dist/commands/update-quality-scores.js +20 -0
package/dist/composition-root.d.ts +2 -3
package/dist/composition-root.js +27 -14
package/dist/config/features.ts +23 -0
package/dist/config/models.ts +100 -0
package/dist/config/prompts.ts +16 -0
package/dist/config/rubrics.ts +225 -0
package/dist/config/schedules.ts +47 -0
package/dist/config/sinks.ts +37 -0
package/dist/config/sources.ts +21 -0
package/dist/config/thresholds.ts +61 -0
package/dist/lib/agent-behavior-report.d.ts +8 -0
package/dist/lib/agent-behavior-report.js +185 -0
package/dist/lib/baseline.d.ts +19 -0
package/dist/lib/baseline.js +153 -0
package/dist/lib/calculate-scores.d.ts +23 -0
package/dist/lib/calculate-scores.js +42 -0
package/dist/lib/compare.d.ts +18 -0
package/dist/lib/compare.js +170 -0
package/dist/lib/coverage-audit.d.ts +4 -0
package/dist/lib/coverage-audit.js +42 -0
package/dist/lib/discovery-report.d.ts +13 -0
package/dist/lib/discovery-report.js +57 -0
package/dist/lib/fetch-docs.d.ts +30 -0
package/dist/lib/fetch-docs.js +171 -0
package/dist/lib/generate-configs.d.ts +25 -0
package/dist/lib/generate-configs.js +42 -0
package/dist/lib/grader-api.d.ts +21 -0
package/dist/lib/grader-api.js +34 -0
package/dist/lib/grader-compare.d.ts +19 -0
package/dist/lib/grader-compare.js +91 -0
package/dist/lib/grader-consistency.d.ts +27 -0
package/dist/lib/grader-consistency.js +79 -0
package/dist/lib/grader-sensitivity.d.ts +19 -0
package/dist/lib/grader-sensitivity.js +75 -0
package/dist/lib/grader-validate.d.ts +19 -0
package/dist/lib/grader-validate.js +78 -0
package/dist/lib/measure-retrieval.d.ts +14 -0
package/dist/lib/measure-retrieval.js +71 -0
package/dist/lib/pr-comment.d.ts +16 -0
package/dist/lib/pr-comment.js +28 -0
package/dist/lib/readiness-report.d.ts +13 -0
package/dist/lib/readiness-report.js +108 -0
package/dist/lib/webhook-server.d.ts +11 -0
package/dist/lib/webhook-server.js +24 -0
package/dist/lib/weekly-digest.d.ts +24 -0
package/dist/lib/weekly-digest.js +148 -0
package/dist/orchestration/build-app-context.js +13 -0
package/dist/orchestration/cache-context.d.ts +23 -0
package/dist/orchestration/cache-context.js +43 -0
package/dist/orchestration/env-bridge.d.ts +21 -0
package/dist/orchestration/env-bridge.js +66 -0
package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
package/dist/orchestration/load-pipeline-tasks.js +52 -0
package/dist/orchestration/pipeline-orchestrator.js +75 -5
package/dist/orchestration/step-runner.js +5 -1
package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
package/dist/orchestration/steps/calculate-scores-step.js +13 -0
package/dist/orchestration/steps/callback-step.js +10 -1
package/dist/orchestration/steps/compare-step.js +6 -3
package/dist/orchestration/steps/discovery-report-step.js +6 -2
package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
package/dist/orchestration/steps/fetch-docs-step.js +30 -16
package/dist/orchestration/steps/gap-analysis-step.js +13 -2
package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
package/dist/orchestration/steps/generate-configs-step.js +50 -15
package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
package/dist/orchestration/steps/publish-report-step.js +19 -0
package/dist/orchestration/steps/readiness-step.js +8 -3
package/dist/orchestration/steps/report-step.js +17 -4
package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
package/dist/orchestration/steps/run-eval-step.js +51 -31
package/dist/pipeline/agent-behavior-report.js +6 -0
package/dist/pipeline/attribution.d.ts +1 -1
package/dist/pipeline/attribution.js +1 -1
package/dist/pipeline/cache.js +29 -15
package/dist/pipeline/calculate-scores.d.ts +2 -0
package/dist/pipeline/calculate-scores.js +70 -33
package/dist/pipeline/chronic-failures.d.ts +55 -0
package/dist/pipeline/chronic-failures.js +110 -0
package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +33 -0
package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
package/dist/pipeline/compiler/assertion-mapper.js +1 -1
package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
package/dist/pipeline/compiler/config-loader.d.ts +14 -0
package/dist/pipeline/compiler/config-loader.js +42 -2
package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
package/dist/pipeline/compiler/fixture-resolver.js +1 -1
package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
package/dist/pipeline/compiler/ignore-fields.js +1 -1
package/dist/pipeline/compiler/index.d.ts +2 -5
package/dist/pipeline/compiler/index.js +2 -5
package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
package/dist/pipeline/compiler/literacy-bridge.js +1 -1
package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +1 -1
package/dist/pipeline/compiler/mode-bases/agent-harness.js +1 -1
package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +1 -1
package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +1 -1
package/dist/pipeline/compiler/mode-bases/literacy.d.ts +13 -2
package/dist/pipeline/compiler/mode-bases/literacy.js +55 -1
package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +1 -1
package/dist/pipeline/compiler/mode-bases/mcp-server.js +1 -1
package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +1 -1
package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -1
package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
package/dist/pipeline/compiler/mode-handlers/index.d.ts +2 -2
package/dist/pipeline/compiler/mode-handlers/index.js +2 -2
package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +1 -1
package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +1 -1
package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
package/dist/pipeline/compiler/mode-handlers/literacy/index.js +1 -1
package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +334 -0
package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +1 -1
package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +1 -1
package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +1 -1
package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +1 -1
package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +4 -0
package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +69 -0
package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +307 -0
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +22 -5
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +6 -0
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +10 -5
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +314 -7
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +10 -0
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +1 -1
package/dist/pipeline/compiler/presets/sanity-literacy.js +1 -1
package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
package/dist/pipeline/compiler/provider-assembler.js +13 -7
package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/index.js +1 -1
package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
package/dist/pipeline/compiler/scoring-bridge.js +1 -1
package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
package/dist/pipeline/compiler/task-bridge.js +92 -0
package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
package/dist/pipeline/compiler/task-graph-builder.js +1 -4
package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
package/dist/pipeline/compiler/telemetry/index.js +1 -1
package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
package/dist/pipeline/compiler/variable-resolver.js +1 -1
package/dist/pipeline/coverage-audit.d.ts +1 -1
package/dist/pipeline/coverage-audit.js +1 -1
package/dist/pipeline/degradations.d.ts +1 -1
package/dist/pipeline/degradations.js +1 -1
package/dist/pipeline/failure-modes.d.ts +1 -1
package/dist/pipeline/failure-modes.js +13 -1
package/dist/pipeline/gap-analysis.d.ts +1 -1
package/dist/pipeline/gap-analysis.js +3 -1
package/dist/pipeline/generate-configs.d.ts +2 -2
package/dist/pipeline/generate-configs.js +15 -8
package/dist/pipeline/grader-compare-runner.d.ts +1 -1
package/dist/pipeline/grader-compare-runner.js +7 -1
package/dist/pipeline/grader-comparison.d.ts +1 -1
package/dist/pipeline/grader-comparison.js +1 -1
package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
package/dist/pipeline/grader-consistency-runner.js +7 -1
package/dist/pipeline/grader-consistency.d.ts +1 -1
package/dist/pipeline/grader-consistency.js +1 -1
package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
package/dist/pipeline/grader-sensitivity-runner.js +1 -1
package/dist/pipeline/grader-sensitivity.d.ts +1 -1
package/dist/pipeline/grader-sensitivity.js +1 -1
package/dist/pipeline/grader-validate-runner.d.ts +1 -1
package/dist/pipeline/grader-validate-runner.js +2 -2
package/dist/pipeline/grader-validation.d.ts +1 -1
package/dist/pipeline/grader-validation.js +1 -1
package/dist/pipeline/map-request-to-config.js +15 -2
package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
package/dist/pipeline/mirror-repo-tasks.js +1 -1
package/dist/pipeline/plan-format.d.ts +1 -1
package/dist/pipeline/plan-format.js +1 -1
package/dist/pipeline/plan.d.ts +1 -1
package/dist/pipeline/plan.js +67 -29
package/dist/pipeline/probe.d.ts +1 -1
package/dist/pipeline/probe.js +1 -1
package/dist/pipeline/readiness-report.d.ts +2 -2
package/dist/pipeline/readiness-report.js +2 -2
package/dist/pipeline/release-classification.d.ts +1 -1
package/dist/pipeline/release-classification.js +1 -1
package/dist/pipeline/release-report.d.ts +1 -1
package/dist/pipeline/release-report.js +1 -1
package/dist/pipeline/repo-eval-comment.d.ts +1 -1
package/dist/pipeline/repo-eval-comment.js +1 -1
package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
package/dist/pipeline/repo-threshold-evaluator.js +1 -1
package/dist/pipeline/resolve-mappings.d.ts +6 -6
package/dist/pipeline/resolve-mappings.js +44 -44
package/dist/pipeline/retrieval-metrics.d.ts +3 -3
package/dist/pipeline/retrieval-metrics.js +28 -20
package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
package/dist/pipeline/steps/calculate-scores-step.js +89 -0
package/dist/pipeline/steps/compare-step.d.ts +18 -0
package/dist/pipeline/steps/compare-step.js +90 -0
package/dist/pipeline/steps/eval-step.d.ts +53 -0
package/dist/pipeline/steps/eval-step.js +347 -0
package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
package/dist/pipeline/steps/fetch-docs-step.js +84 -0
package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
package/dist/pipeline/steps/generate-configs-step.js +98 -0
package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
package/dist/pipeline/steps/grader-consistency-step.js +74 -0
package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
package/dist/pipeline/steps/publish-report-step.js +243 -0
package/dist/pipeline/steps/report-step.d.ts +13 -0
package/dist/pipeline/steps/report-step.js +56 -0
package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
package/dist/pipeline/steps/update-scores-step.js +42 -0
package/dist/pipeline/targeted-loo.d.ts +1 -1
package/dist/pipeline/targeted-loo.js +1 -1
package/dist/pipeline/thresholds.d.ts +1 -1
package/dist/pipeline/thresholds.js +1 -1
package/dist/pipeline/validate.js +13 -0
package/dist/report-store.d.ts +17 -0
package/dist/report-store.js +24 -0
package/dist/scripts/agent-behavior-report.d.ts +19 -0
package/dist/scripts/agent-behavior-report.js +315 -0
package/dist/scripts/baseline.d.ts +43 -0
package/dist/scripts/baseline.js +267 -0
package/dist/scripts/calculate-scores.d.ts +166 -0
package/dist/scripts/calculate-scores.js +1296 -0
package/dist/scripts/compare.d.ts +22 -0
package/dist/scripts/compare.js +334 -0
package/dist/scripts/coverage-audit.d.ts +44 -0
package/dist/scripts/coverage-audit.js +209 -0
package/dist/scripts/debug-eval.d.ts +19 -0
package/dist/scripts/debug-eval.js +73 -0
package/dist/scripts/discovery-report.d.ts +58 -0
package/dist/scripts/discovery-report.js +250 -0
package/dist/scripts/fetch-docs.d.ts +35 -0
package/dist/scripts/fetch-docs.js +472 -0
package/dist/scripts/generate-configs.d.ts +66 -0
package/dist/scripts/generate-configs.js +459 -0
package/dist/scripts/grader-api.d.ts +27 -0
package/dist/scripts/grader-api.js +206 -0
package/dist/scripts/grader-compare.d.ts +22 -0
package/dist/scripts/grader-compare.js +368 -0
package/dist/scripts/grader-consistency.d.ts +20 -0
package/dist/scripts/grader-consistency.js +313 -0
package/dist/scripts/grader-sensitivity.d.ts +22 -0
package/dist/scripts/grader-sensitivity.js +354 -0
package/dist/scripts/grader-validate.d.ts +19 -0
package/dist/scripts/grader-validate.js +267 -0
package/dist/scripts/measure-retrieval.d.ts +10 -0
package/dist/scripts/measure-retrieval.js +145 -0
package/dist/scripts/migrate-task-mode.d.ts +1 -1
package/dist/scripts/migrate-task-mode.js +1 -1
package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
package/dist/scripts/pipeline.d.ts +76 -0
package/dist/scripts/pipeline.js +1031 -0
package/dist/scripts/pr-comment.d.ts +10 -0
package/dist/scripts/pr-comment.js +510 -0
package/dist/scripts/readiness-report.d.ts +88 -0
package/dist/scripts/readiness-report.js +342 -0
package/dist/scripts/update-quality-scores.d.ts +15 -0
package/dist/scripts/update-quality-scores.js +184 -0
package/dist/scripts/validate-task-sources.d.ts +1 -1
package/dist/scripts/validate-task-sources.js +1 -1
package/dist/scripts/validate.d.ts +13 -0
package/dist/scripts/validate.js +79 -0
package/dist/scripts/webhook-server.d.ts +26 -0
package/dist/scripts/webhook-server.js +147 -0
package/dist/scripts/weekly-digest.d.ts +24 -0
package/dist/scripts/weekly-digest.js +144 -0
package/dist/sinks/format-slack.d.ts +64 -0
package/dist/sinks/format-slack.js +306 -0
package/dist/sinks/slack-sink.d.ts +27 -0
package/dist/sinks/slack-sink.js +78 -0
package/dist/sinks/types.d.ts +1 -1
package/dist/sinks/types.js +1 -1
package/dist/sinks/webhook-sink.d.ts +19 -0
package/dist/sinks/webhook-sink.js +50 -0
package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
package/dist/tasks/literacy/content-lake.task.ts +181 -0
package/dist/tasks/literacy/frameworks.task.ts +129 -0
package/dist/tasks/literacy/functions.task.ts +70 -0
package/dist/tasks/literacy/groq.task.ts +259 -0
package/dist/tasks/literacy/image-handling.task.ts +95 -0
package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
package/dist/tasks/literacy/portable-text.task.ts +169 -0
package/dist/tasks/literacy/studio-setup.task.ts +134 -0
package/dist/tasks/literacy/visual-editing.task.ts +147 -0
package/package.json +24 -24
package/tasks/.expanded.agentic.yaml +280 -0
package/tasks/.expanded.yaml +565 -0
package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
package/tasks/literacy/content-lake.task.ts +181 -0
package/tasks/literacy/frameworks.task.ts +1 -0
package/tasks/literacy/functions.task.ts +1 -0
package/tasks/literacy/groq.task.ts +1 -0
package/tasks/literacy/image-handling.task.ts +95 -0
package/tasks/literacy/nextjs-live.task.ts +2 -1
package/tasks/literacy/portable-text.task.ts +169 -0
package/tasks/literacy/studio-setup.task.ts +5 -2
package/tasks/literacy/visual-editing.task.ts +1 -0
package/LICENSE +0 -21
package/tasks/frameworks.yaml +0 -98
package/tasks/functions.yaml +0 -51
package/tasks/groq.yaml +0 -216
package/tasks/nextjs-live.yaml +0 -62
package/tasks/studio-setup.yaml +0 -111
package/tasks/visual-editing.yaml +0 -120

package/dist/tasks/literacy/studio-setup.task.ts ADDED Viewed

@@ -0,0 +1,134 @@
+import { defineTask } from "../../_vendor/ailf-core/index.js"
+// See content-lake.task.ts for detailed explanations of common task properties
+export default [
+  defineTask({
+    id: "studio-blog-schema",
+    mode: "literacy",
+    title: "Studio Setup - Blog schema with posts, authors, categories",
+    description: "Studio Setup - Blog schema with posts, authors, categories",
+    area: "studio-setup",
+    docCoverage: true,
+    context: {
+      docs: [
+        {
+          slug: "schemas-and-forms",
+          reason: "High-level overview of schemas and the form builder",
+        },
+        {
+          slug: "introduction-to-schemas",
+          reason: "Introduces schema concepts, defineType/defineField",
+        },
+        {
+          slug: "config-api-reference",
+          reason:
+            "Configuration API — defineConfig, plugins, schema registration",
+        },
+        {
+          slug: "reference-type",
+          reason: "Reference field type for author/category relationships",
+        },
+      ],
+    },
+    referenceSolution: "reference-solutions/studio-setup/blog-schema.ts",
+    prompt: {
+      vars: {
+        task: `Set up a new Sanity Studio with a custom schema for a blog:
+1. Create document types for: posts, authors, categories
+2. Posts should have: title, slug, body (portable text), author reference, categories array
+3. Authors should have: name, bio, image
+4. Categories should have: title, description
+Include the schema definitions and sanity.config.ts setup.`,
+        docs: "file://contexts/canonical/studio-blog-schema.md",
+      },
+    },
+    assertions: [
+      {
+        type: "llm-rubric",
+        template: "task-completion",
+        criteria: [
+          "Three document types (post, author, category)",
+          "Post with all required fields including portable text body",
+          "Reference from post to author",
+          "Array of references from post to categories",
+          "sanity.config.ts with schema registration",
+        ],
+      },
+      {
+        type: "llm-rubric",
+        template: "code-correctness",
+        criteria: [
+          "Uses defineConfig, defineType, defineField (v3 syntax)",
+          "Does NOT use createSchema (deprecated v2)",
+          "Proper reference syntax with 'to' array",
+          "Correct portable text array definition",
+        ],
+      },
+      { type: "contains", value: "defineConfig", weight: 1 },
+      { type: "contains", value: "defineType", weight: 1 },
+      { type: "contains", value: "defineField", weight: 1 },
+      // not-contains: FAILS the assertion if the string appears — penalizes deprecated v2 schema API
+      { type: "not-contains", value: "createSchema", weight: 1 },
+      { type: "not-contains", value: "import Schema from", weight: 1 },
+    ],
+  }),
+  defineTask({
+    id: "studio-custom-tool",
+    mode: "literacy",
+    title: "Studio Setup - Custom tool in sidebar",
+    description: "Studio Setup - Custom tool in sidebar",
+    area: "studio-setup",
+    context: {
+      docs: [
+        {
+          slug: "studio-tools",
+          reason: "Overview of Studio tools system",
+        },
+        {
+          slug: "tool-api-reference",
+          reason: "Tool API — name, title, icon, component properties",
+        },
+        {
+          slug: "custom-studio-tool",
+          reason: "Step-by-step guide for creating a custom tool",
+        },
+      ],
+    },
+    referenceSolution: "reference-solutions/studio-setup/custom-tool.tsx",
+    prompt: {
+      vars: {
+        task: `Add a custom tool to the Sanity Studio sidebar that displays
+a dashboard. The tool should:
+1. Appear in the studio navigation with a custom icon
+2. Have a title and name
+3. Render a React component showing a "Dashboard" heading
+Provide the tool definition and sanity.config.ts registration.`,
+        docs: "file://contexts/canonical/studio-custom-tool.md",
+      },
+    },
+    assertions: [
+      {
+        type: "llm-rubric",
+        template: "task-completion",
+        criteria: [
+          "Tool object with name, title, icon, component",
+          "React component for the tool UI",
+          "Registration in sanity.config.ts tools array",
+        ],
+      },
+      { type: "contains", value: "tools", weight: 1 },
+      // javascript: custom JS assertion — `output` is the model's response string; must return boolean
+      {
+        type: "javascript",
+        value: `return output.includes('name:') &&
+        output.includes('component') &&
+        (output.includes('icon:') || output.includes('Icon'))`,
+      },
+    ],
+  }),
+]

package/dist/tasks/literacy/visual-editing.task.ts ADDED Viewed

@@ -0,0 +1,147 @@
+import { defineTask } from "../../_vendor/ailf-core/index.js"
+// See content-lake.task.ts for detailed explanations of common task properties
+export default [
+  defineTask({
+    id: "visual-editing-presentation",
+    mode: "literacy",
+    title: "Visual Editing - Presentation tool with click-to-edit",
+    description: "Visual Editing - Presentation tool with click-to-edit",
+    area: "visual-editing",
+    docCoverage: true,
+    context: {
+      docs: [
+        {
+          slug: "configuring-the-presentation-tool",
+          reason: "Core presentationTool configuration and setup",
+        },
+        {
+          slug: "introduction-to-visual-editing",
+          reason: "Visual Editing concepts — stega, overlays, data attributes",
+        },
+        {
+          slug: "visual-editing-with-next-js-app-router",
+          reason: "Next.js App Router-specific visual editing guide",
+        },
+        {
+          slug: "stega",
+          reason: "Stega encoding for click-to-edit data attributes",
+        },
+      ],
+    },
+    referenceSolution:
+      "reference-solutions/visual-editing/presentation-nextjs.tsx",
+    prompt: {
+      vars: {
+        task: `Set up the Presentation tool with a Next.js 14 (App Router) frontend
+and implement click-to-edit functionality:
+1. Configure the Presentation tool in sanity.config.ts
+2. Set up the Next.js app to work with Visual Editing
+3. Implement data attributes so clicking content in the preview
+   opens the corresponding field in Studio
+Provide all necessary code for both Studio and Next.js sides.`,
+        docs: "file://contexts/canonical/visual-editing-presentation.md",
+      },
+    },
+    assertions: [
+      {
+        type: "llm-rubric",
+        template: "task-completion",
+        criteria: [
+          "presentationTool configured in sanity.config.ts",
+          "previewUrl or equivalent configured",
+          "Data attributes for click-to-edit (createDataAttribute or stega)",
+          "Next.js App Router patterns used correctly",
+        ],
+      },
+      {
+        type: "llm-rubric",
+        template: "code-correctness",
+        criteria: [
+          "Uses @sanity/presentation (not deprecated packages)",
+          "Uses createDataAttribute or stega encoding correctly",
+          "Proper Next.js App Router patterns (not Pages Router)",
+          "No mixing of deprecated and current APIs",
+        ],
+      },
+      { type: "contains", value: "presentationTool", weight: 1 },
+      {
+        type: "contains-any",
+        value: [
+          "createDataAttribute",
+          "data-sanity",
+          "encodeDataAttribute",
+          "stega",
+        ],
+        weight: 1,
+      },
+      { type: "not-contains", value: "@sanity/preview-kit", weight: 1 },
+    ],
+  }),
+  defineTask({
+    id: "visual-editing-live-preview",
+    mode: "literacy",
+    title: "Visual Editing - Live preview with draft content",
+    description: "Visual Editing - Live preview with draft content",
+    area: "visual-editing",
+    context: {
+      docs: [
+        {
+          slug: "live-content-api",
+          reason: "Live Content API — defineLive, real-time subscriptions",
+        },
+        {
+          slug: "perspectives",
+          reason: "Draft vs published perspectives",
+        },
+        {
+          slug: "fetching-content-for-visual-editing",
+          reason: "Data fetching patterns for visual editing contexts",
+        },
+      ],
+    },
+    referenceSolution: "reference-solutions/visual-editing/live-preview.tsx",
+    prompt: {
+      vars: {
+        task: `Implement live preview in a Next.js app that shows draft content
+from Sanity in real-time as editors make changes in the Studio.
+Requirements:
+- Use the Live Content API approach
+- Handle draft vs published perspectives correctly
+- Show real-time updates without page refresh
+Provide a complete implementation.`,
+        docs: "file://contexts/canonical/visual-editing-live-preview.md",
+      },
+    },
+    assertions: [
+      {
+        type: "llm-rubric",
+        template: "task-completion",
+        criteria: [
+          "Live Content API usage (defineLive, useLiveQuery, or sanityFetch with live option)",
+          "Draft perspective configuration",
+          "Real-time subscription/update mechanism",
+        ],
+      },
+      {
+        type: "llm-rubric",
+        template: "code-correctness",
+        criteria: [
+          "Modern API usage (not deprecated preview packages)",
+          "Proper perspective handling",
+          "Correct subscription lifecycle management",
+        ],
+      },
+      {
+        type: "contains-any",
+        value: ["useLiveQuery", "defineLive", "live:", "perspective"],
+        weight: 1,
+      },
+    ],
+  }),
+]

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sanity/ailf",
-  "version": "2.0.0",
+  "version": "2.0.1",
   "private": false,
   "publishConfig": {
     "access": "restricted"
@@ -31,28 +31,6 @@
     "canonical",
     "tasks"
   ],
-  "dependencies": {
-    "@google-cloud/bigquery": "^8.1.1",
-    "@inquirer/prompts": "^8.3.0",
-    "@modelcontextprotocol/sdk": "^1.29.0",
-    "@portabletext/markdown": "^1.0.0",
-    "@sanity/client": "^7.3.0",
-    "commander": "^14.0.3",
-    "dotenv": "^16.4.7",
-    "dotenv-cli": "^11.0.0",
-    "jiti": "^2.6.1",
-    "js-yaml": "^4.1.0",
-    "promptfoo": "^0.120.24",
-    "zod": "^4.3.6"
-  },
-  "devDependencies": {
-    "@types/js-yaml": "^4.0.9",
-    "@types/node": "^22.13.1",
-    "tsx": "^4.19.2",
-    "typescript": "^5.7.3",
-    "@sanity/ailf-shared": "0.1.0",
-    "@sanity/ailf-core": "0.1.0"
-  },
   "scripts": {
     "build": "tsc && tsx scripts/bundle-workspace-deps.ts",
     "generate-configs": "tsx src/cli.ts generate-configs",
@@ -80,5 +58,27 @@
     "discovery-report": "tsx src/cli.ts discovery-report",
     "webhook-server": "tsx src/cli.ts webhook-server",
     "weekly-digest": "tsx src/cli.ts weekly-digest"
+  },
+  "dependencies": {
+    "@google-cloud/bigquery": "^8.1.1",
+    "@inquirer/prompts": "^8.3.0",
+    "@modelcontextprotocol/sdk": "^1.29.0",
+    "@portabletext/markdown": "^1.0.0",
+    "@sanity/client": "^7.3.0",
+    "commander": "^14.0.3",
+    "dotenv": "^16.4.7",
+    "dotenv-cli": "^11.0.0",
+    "jiti": "^2.6.1",
+    "js-yaml": "^4.1.0",
+    "promptfoo": "^0.120.24",
+    "zod": "^4.3.6"
+  },
+  "devDependencies": {
+    "@sanity/ailf-core": "workspace:*",
+    "@sanity/ailf-shared": "workspace:*",
+    "@types/js-yaml": "^4.0.9",
+    "@types/node": "^22.13.1",
+    "tsx": "^4.19.2",
+    "typescript": "^5.7.3"
   }
-}
+}

package/tasks/.expanded.agentic.yaml ADDED Viewed

@@ -0,0 +1,280 @@
+# .expanded.agentic.yaml
+#
+# AUTO-GENERATED by compiler pipeline — do not edit directly.
+# Gold entries only (no baseline) for agentic evaluation mode.
+# Run: npx @sanity/ailf generate-configs
+- description: GROQ - Blog queries with filtering and pagination (gold)
+  vars:
+    task: |-
+      Write GROQ queries for a Sanity blog application:
+      1. Fetch all published blog posts ordered by publishedAt descending,
+         with a projection that includes: _id, title, slug (from slug.current),
+         publishedAt, excerpt, and the author's name (resolved from a reference)
+      2. Add pagination to return only the first 10 results
+      3. Fetch a single post by its slug parameter, including the full body
+         content and resolved author and category references
+      4. Fetch posts published after a specific date
+      5. Fetch posts that belong to a specific category (where categories
+         is an array of references)
+      Use @sanity/client with client.fetch() for all queries. Include
+      TypeScript types for the query results.
+    docs: file://contexts/canonical/groq-blog-queries.md
+    __featureArea: groq
+  assert:
+    - type: llm-rubric
+      value: |-
+        Score task completion from 0 to 100:
+        - 0: Couldn't attempt — missing critical information
+        - 20: Attempted but fundamentally wrong approach
+        - 50: Partial implementation — major functional gaps
+        - 80: Mostly complete — minor issues or missing edge cases
+        - 100: Fully functional code — works as expected
+        Must demonstrate:
+        - GROQ filter with _type == "post"
+        - Projection with aliased slug field ("slug": slug.current)
+        - Reference resolution with -> for author
+        - Ordering with | order(publishedAt desc)
+        - Slice/pagination syntax [0...10] or [0..9]
+        - Parameterized query with $slug for single post fetch
+        - Date filtering with dateTime() or string comparison
+        - Category filtering using references or array contains
+        Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
+      provider: anthropic:messages:claude-opus-4-5-20251101
+      metadata:
+        dimension: task-completion
+        maxScore: 100
+    - type: llm-rubric
+      value: |-
+        Score code correctness from 0 to 100:
+        - 0: Broken code, syntax errors, or deprecated APIs
+        - 30: Works but uses anti-patterns or inefficient approaches
+        - 50: Works but not idiomatic
+        - 80: Follows most best practices
+        - 100: Follows all best practices, idiomatic implementation
+        Check for:
+        - Valid GROQ syntax (proper filter brackets, projection braces)
+        - Uses @sanity/client createClient + client.fetch()
+        - Correct parameter passing syntax ($param)
+        - Proper reference dereference with ->
+        - No deprecated patterns
+        Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
+      provider: anthropic:messages:claude-opus-4-5-20251101
+      metadata:
+        dimension: code-correctness
+        maxScore: 100
+    - type: contains-any
+      value:
+        - client.fetch
+        - createClient
+      weight: 1
+    - type: contains-any
+      value:
+        - order(publishedAt
+        - order(_createdAt
+        - '| order('
+      weight: 1
+    - type: contains-any
+      value:
+        - '[0...10]'
+        - '[0..9]'
+        - '[0...'
+      weight: 1
+    - type: llm-rubric
+      value: |-
+        Score documentation coverage from 0 to 100:
+        - 0: Had to hallucinate/guess most implementation details
+        - 30: Significant gaps — filled with assumptions
+        - 50: Some gaps — inferred from partial information
+        - 80: Minor gaps — almost everything was documented
+        - 100: Complete coverage — all necessary info was in docs
+        Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
+      provider: anthropic:messages:claude-opus-4-5-20251101
+      metadata:
+        dimension: doc-coverage
+        maxScore: 100
+- description: GROQ - Joins and reference resolution (gold)
+  vars:
+    task: |-
+      Write GROQ queries that demonstrate join patterns in Sanity:
+      1. Follow a single reference to resolve an author's full profile
+         from a post (post.author -> author document with name, bio, image)
+      2. Resolve an array of category references from a post
+         (post.categories[]-> with title and slug)
+      3. Write a reverse reference query: given an author's ID, find all
+         posts by that author using a subquery and the parent scope operator (^)
+      4. Create a nested join: for each author, include their 5 most recent
+         posts as a nested array
+      5. Use the references() function to find all documents that reference
+         a specific document ID
+      Use @sanity/client with client.fetch(). Include TypeScript types.
+    docs: file://contexts/canonical/groq-joins-references.md
+    __featureArea: groq
+  assert:
+    - type: llm-rubric
+      value: |-
+        Score task completion from 0 to 100:
+        - 0: Couldn't attempt — missing critical information
+        - 20: Attempted but fundamentally wrong approach
+        - 50: Partial implementation — major functional gaps
+        - 80: Mostly complete — minor issues or missing edge cases
+        - 100: Fully functional code — works as expected
+        Must demonstrate:
+        - Single reference follow with -> operator
+        - Array reference resolution with []->
+        - Reverse reference / subquery using *[references(^._id)]
+        - Nested join pattern with parent scope (^)
+        - The references() function
+        Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
+      provider: anthropic:messages:claude-opus-4-5-20251101
+      metadata:
+        dimension: task-completion
+        maxScore: 100
+    - type: llm-rubric
+      value: |-
+        Score code correctness from 0 to 100:
+        - 0: Broken code, syntax errors, or deprecated APIs
+        - 30: Works but uses anti-patterns or inefficient approaches
+        - 50: Works but not idiomatic
+        - 80: Follows most best practices
+        - 100: Follows all best practices, idiomatic implementation
+        Check for:
+        - Correct -> dereference syntax
+        - Valid []-> array dereference
+        - Proper use of ^ parent scope operator
+        - Valid references() function usage
+        - No made-up syntax
+        Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
+      provider: anthropic:messages:claude-opus-4-5-20251101
+      metadata:
+        dimension: code-correctness
+        maxScore: 100
+    - type: contains
+      value: '->'
+      weight: 1
+    - type: contains-any
+      value:
+        - references(
+        - references(^
+      weight: 1
+    - type: llm-rubric
+      value: |-
+        Score documentation coverage from 0 to 100:
+        - 0: Had to hallucinate/guess most implementation details
+        - 30: Significant gaps — filled with assumptions
+        - 50: Some gaps — inferred from partial information
+        - 80: Minor gaps — almost everything was documented
+        - 100: Complete coverage — all necessary info was in docs
+        Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
+      provider: anthropic:messages:claude-opus-4-5-20251101
+      metadata:
+        dimension: doc-coverage
+        maxScore: 100
+- description: GROQ - Advanced filtering and projections (gold)
+  vars:
+    task: |-
+      Write GROQ queries demonstrating advanced filtering and projection patterns:
+      1. Use select() for conditional projections — return different fields
+         based on the document's _type (e.g., posts get excerpt, events get
+         date and venue)
+      2. Use coalesce() for fallback values — e.g., use seoTitle if it
+         exists, otherwise fall back to title
+      3. Use the match operator for full-text search in titles
+      4. Use count() to count documents matching a filter and to count
+         items within an array field
+      5. Use defined() to filter for documents that have a specific field set
+      6. Filter items within an array using [condition] syntax
+      7. Order results by multiple fields (e.g., featured status first,
+         then by publishedAt)
+      Use @sanity/client with client.fetch(). Include TypeScript types.
+    docs: file://contexts/canonical/groq-advanced-filtering.md
+    __featureArea: groq
+  assert:
+    - type: llm-rubric
+      value: |-
+        Score task completion from 0 to 100:
+        - 0: Couldn't attempt — missing critical information
+        - 20: Attempted but fundamentally wrong approach
+        - 50: Partial implementation — major functional gaps
+        - 80: Mostly complete — minor issues or missing edge cases
+        - 100: Fully functional code — works as expected
+        Must demonstrate:
+        - select() for conditional projections
+        - coalesce() for fallback values
+        - match operator for text search
+        - count() function usage
+        - defined() function for existence checks
+        - Array filtering with [condition]
+        - Multi-field ordering
+        Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
+      provider: anthropic:messages:claude-opus-4-5-20251101
+      metadata:
+        dimension: task-completion
+        maxScore: 100
+    - type: llm-rubric
+      value: |-
+        Score code correctness from 0 to 100:
+        - 0: Broken code, syntax errors, or deprecated APIs
+        - 30: Works but uses anti-patterns or inefficient approaches
+        - 50: Works but not idiomatic
+        - 80: Follows most best practices
+        - 100: Follows all best practices, idiomatic implementation
+        Check for:
+        - Valid select() syntax with => arrow notation
+        - Correct coalesce() usage
+        - Proper match operator usage (on text fields)
+        - Valid count() and defined() function calls
+        - Correct array filter syntax
+        Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
+      provider: anthropic:messages:claude-opus-4-5-20251101
+      metadata:
+        dimension: code-correctness
+        maxScore: 100
+    - type: contains-any
+      value:
+        - select(
+        - coalesce(
+      weight: 1
+    - type: contains-any
+      value:
+        - count(
+        - defined(
+      weight: 1
+    - type: contains-any
+      value:
+        - match
+      weight: 1
+    - type: llm-rubric
+      value: |-
+        Score documentation coverage from 0 to 100:
+        - 0: Had to hallucinate/guess most implementation details
+        - 30: Significant gaps — filled with assumptions
+        - 50: Some gaps — inferred from partial information
+        - 80: Minor gaps — almost everything was documented
+        - 100: Complete coverage — all necessary info was in docs
+        Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
+      provider: anthropic:messages:claude-opus-4-5-20251101
+      metadata:
+        dimension: doc-coverage
+        maxScore: 100