@sanity/ailf 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -1
- package/canonical/grader-references/README.md +2 -2
- package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
- package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
- package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
- package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
- package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
- package/config/features.ts +1 -1
- package/config/models.ts +29 -12
- package/config/sources.ts +1 -1
- package/config/thresholds.ts +1 -1
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
- package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
- package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
- package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
- package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
- package/dist/_vendor/ailf-core/config-helpers.js +51 -2
- package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
- package/dist/_vendor/ailf-core/examples/index.js +213 -94
- package/dist/_vendor/ailf-core/index.d.ts +3 -2
- package/dist/_vendor/ailf-core/index.js +2 -1
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
- package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
- package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
- package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
- package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/services/index.js +1 -1
- package/dist/_vendor/ailf-core/services/scoring.js +9 -0
- package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
- package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
- package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
- package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
- package/dist/adapters/api-client/remediation.js +2 -2
- package/dist/adapters/config-sources/file-config-adapter.js +7 -1
- package/dist/adapters/config-sources/ts-config-loader.js +21 -13
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
- package/dist/adapters/index.d.ts +0 -1
- package/dist/adapters/index.js +0 -1
- package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
- package/dist/adapters/task-sources/composite-task-source.js +1 -1
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
- package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
- package/dist/adapters/task-sources/index.d.ts +3 -4
- package/dist/adapters/task-sources/index.js +3 -4
- package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
- package/dist/adapters/task-sources/repo-schemas.js +228 -20
- package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
- package/dist/adapters/task-sources/repo-task-source.js +81 -122
- package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
- package/dist/adapters/task-sources/repo-trigger.js +1 -1
- package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
- package/dist/adapters/task-sources/repo-validation.js +126 -5
- package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
- package/dist/adapters/task-sources/task-file-loader.js +21 -7
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/artifact-capture/comparator.d.ts +22 -0
- package/dist/artifact-capture/comparator.js +493 -0
- package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
- package/dist/artifact-capture/filesystem-collector.js +237 -0
- package/dist/artifact-capture/redact-artifact.d.ts +20 -0
- package/dist/artifact-capture/redact-artifact.js +115 -0
- package/dist/assertions/source-isolation.d.ts +1 -1
- package/dist/assertions/source-isolation.js +1 -1
- package/dist/cli.js +4 -0
- package/dist/commands/calculate-scores.js +1 -0
- package/dist/commands/capture-compare.d.ts +15 -0
- package/dist/commands/capture-compare.js +253 -0
- package/dist/commands/capture-list.d.ts +12 -0
- package/dist/commands/capture-list.js +147 -0
- package/dist/commands/capture.d.ts +9 -0
- package/dist/commands/capture.js +16 -0
- package/dist/commands/chronic-failures.d.ts +8 -0
- package/dist/commands/chronic-failures.js +33 -0
- package/dist/commands/coverage-audit.js +3 -1
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +37 -8
- package/dist/commands/fetch-docs.js +1 -0
- package/dist/commands/generate-configs.d.ts +3 -3
- package/dist/commands/generate-configs.js +20 -8
- package/dist/commands/init.d.ts +5 -4
- package/dist/commands/init.js +190 -25
- package/dist/commands/pipeline-action.d.ts +7 -1
- package/dist/commands/pipeline-action.js +43 -19
- package/dist/commands/pipeline.d.ts +6 -1
- package/dist/commands/pipeline.js +7 -2
- package/dist/commands/pr-comment.js +1 -0
- package/dist/commands/publish.js +1 -0
- package/dist/commands/shared/help.js +2 -2
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +2 -2
- package/dist/commands/validate-tasks.js +26 -15
- package/dist/composition-root.d.ts +15 -4
- package/dist/composition-root.js +100 -55
- package/dist/config/features.ts +23 -0
- package/dist/config/models.ts +100 -0
- package/dist/config/prompts.ts +16 -0
- package/dist/config/rubrics.ts +225 -0
- package/dist/config/schedules.ts +47 -0
- package/dist/config/sinks.ts +37 -0
- package/dist/config/sources.ts +21 -0
- package/dist/config/thresholds.ts +61 -0
- package/dist/index.d.ts +41 -0
- package/dist/index.js +48 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.js +13 -0
- package/dist/orchestration/build-step-sequence.js +4 -2
- package/dist/orchestration/cache-context.d.ts +23 -0
- package/dist/orchestration/cache-context.js +43 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
- package/dist/orchestration/load-pipeline-tasks.js +52 -0
- package/dist/orchestration/pipeline-orchestrator.js +75 -5
- package/dist/orchestration/step-runner.js +5 -1
- package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
- package/dist/orchestration/steps/calculate-scores-step.js +13 -0
- package/dist/orchestration/steps/callback-step.js +10 -1
- package/dist/orchestration/steps/compare-step.js +6 -3
- package/dist/orchestration/steps/discovery-report-step.js +6 -2
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
- package/dist/orchestration/steps/fetch-docs-step.js +32 -19
- package/dist/orchestration/steps/gap-analysis-step.js +13 -2
- package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
- package/dist/orchestration/steps/generate-configs-step.js +77 -26
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
- package/dist/orchestration/steps/publish-report-step.js +19 -0
- package/dist/orchestration/steps/readiness-step.js +8 -3
- package/dist/orchestration/steps/report-step.js +17 -4
- package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
- package/dist/orchestration/steps/run-eval-step.js +51 -31
- package/dist/pipeline/agent-behavior-report.js +6 -0
- package/dist/pipeline/attribution.d.ts +1 -1
- package/dist/pipeline/attribution.js +1 -1
- package/dist/pipeline/cache.js +29 -15
- package/dist/pipeline/calculate-scores.d.ts +2 -0
- package/dist/pipeline/calculate-scores.js +70 -33
- package/dist/pipeline/chronic-failures.d.ts +55 -0
- package/dist/pipeline/chronic-failures.js +110 -0
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
- package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
- package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
- package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
- package/dist/pipeline/compiler/assertion-mapper.js +1 -1
- package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
- package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
- package/dist/pipeline/compiler/config-loader.d.ts +14 -0
- package/dist/pipeline/compiler/config-loader.js +42 -2
- package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/fixture-resolver.js +1 -1
- package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
- package/dist/pipeline/compiler/ignore-fields.js +1 -1
- package/dist/pipeline/compiler/index.d.ts +2 -5
- package/dist/pipeline/compiler/index.js +2 -5
- package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
- package/dist/pipeline/compiler/literacy-bridge.js +2 -2
- package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
- package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
- package/dist/pipeline/compiler/mode-bases/index.js +4 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
- package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
- package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
- package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
- package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
- package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
- package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
- package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
- package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
- package/dist/pipeline/compiler/preset-loader.js +99 -0
- package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
- package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
- package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
- package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
- package/dist/pipeline/compiler/provider-assembler.js +13 -7
- package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
- package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/index.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
- package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
- package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
- package/dist/pipeline/compiler/scoring-bridge.js +1 -1
- package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
- package/dist/pipeline/compiler/task-bridge.js +92 -0
- package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
- package/dist/pipeline/compiler/task-graph-builder.js +1 -4
- package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
- package/dist/pipeline/compiler/telemetry/index.js +1 -1
- package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
- package/dist/pipeline/compiler/variable-resolver.js +1 -1
- package/dist/pipeline/coverage-audit.d.ts +1 -1
- package/dist/pipeline/coverage-audit.js +1 -1
- package/dist/pipeline/degradations.d.ts +1 -1
- package/dist/pipeline/degradations.js +1 -1
- package/dist/pipeline/expand-tasks.d.ts +2 -2
- package/dist/pipeline/expand-tasks.js +2 -2
- package/dist/pipeline/failure-modes.d.ts +1 -1
- package/dist/pipeline/failure-modes.js +13 -1
- package/dist/pipeline/gap-analysis.d.ts +1 -1
- package/dist/pipeline/gap-analysis.js +3 -1
- package/dist/pipeline/generate-configs.d.ts +2 -2
- package/dist/pipeline/generate-configs.js +16 -9
- package/dist/pipeline/grader-compare-runner.d.ts +1 -1
- package/dist/pipeline/grader-compare-runner.js +7 -1
- package/dist/pipeline/grader-comparison.d.ts +1 -1
- package/dist/pipeline/grader-comparison.js +1 -1
- package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
- package/dist/pipeline/grader-consistency-runner.js +7 -1
- package/dist/pipeline/grader-consistency.d.ts +1 -1
- package/dist/pipeline/grader-consistency.js +1 -1
- package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity-runner.js +1 -1
- package/dist/pipeline/grader-sensitivity.d.ts +1 -1
- package/dist/pipeline/grader-sensitivity.js +1 -1
- package/dist/pipeline/grader-validate-runner.d.ts +1 -1
- package/dist/pipeline/grader-validate-runner.js +2 -2
- package/dist/pipeline/grader-validation.d.ts +1 -1
- package/dist/pipeline/grader-validation.js +1 -1
- package/dist/pipeline/map-request-to-config.js +16 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
- package/dist/pipeline/mirror-repo-tasks.js +10 -10
- package/dist/pipeline/plan-format.d.ts +1 -1
- package/dist/pipeline/plan-format.js +1 -1
- package/dist/pipeline/plan.d.ts +1 -1
- package/dist/pipeline/plan.js +68 -30
- package/dist/pipeline/probe.d.ts +1 -1
- package/dist/pipeline/probe.js +1 -1
- package/dist/pipeline/readiness-report.d.ts +2 -2
- package/dist/pipeline/readiness-report.js +2 -2
- package/dist/pipeline/release-classification.d.ts +1 -1
- package/dist/pipeline/release-classification.js +1 -1
- package/dist/pipeline/release-report.d.ts +1 -1
- package/dist/pipeline/release-report.js +1 -1
- package/dist/pipeline/repo-eval-comment.d.ts +1 -1
- package/dist/pipeline/repo-eval-comment.js +1 -1
- package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
- package/dist/pipeline/repo-threshold-evaluator.js +1 -1
- package/dist/pipeline/resolve-mappings.d.ts +6 -6
- package/dist/pipeline/resolve-mappings.js +44 -44
- package/dist/pipeline/retrieval-metrics.d.ts +3 -3
- package/dist/pipeline/retrieval-metrics.js +28 -20
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +1 -1
- package/dist/pipeline/targeted-loo.js +1 -1
- package/dist/pipeline/thresholds.d.ts +1 -1
- package/dist/pipeline/thresholds.js +1 -1
- package/dist/pipeline/validate.js +13 -0
- package/dist/report-store.d.ts +17 -0
- package/dist/report-store.js +24 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-task-mode.d.ts +1 -1
- package/dist/scripts/migrate-task-mode.js +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
- package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +1 -1
- package/dist/scripts/validate-task-sources.js +1 -1
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +1 -1
- package/dist/sinks/types.js +1 -1
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
- package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
- package/dist/tasks/literacy/content-lake.task.ts +181 -0
- package/dist/tasks/literacy/frameworks.task.ts +129 -0
- package/dist/tasks/literacy/functions.task.ts +70 -0
- package/dist/tasks/literacy/groq.task.ts +259 -0
- package/dist/tasks/literacy/image-handling.task.ts +95 -0
- package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
- package/dist/tasks/literacy/portable-text.task.ts +169 -0
- package/dist/tasks/literacy/studio-setup.task.ts +134 -0
- package/dist/tasks/literacy/visual-editing.task.ts +147 -0
- package/package.json +32 -24
- package/tasks/.expanded.agentic.yaml +280 -0
- package/tasks/.expanded.yaml +565 -0
- package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
- package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
- package/tasks/literacy/content-lake.task.ts +181 -0
- package/tasks/literacy/frameworks.task.ts +1 -0
- package/tasks/literacy/functions.task.ts +1 -0
- package/tasks/literacy/groq.task.ts +1 -0
- package/tasks/literacy/image-handling.task.ts +95 -0
- package/tasks/literacy/nextjs-live.task.ts +2 -1
- package/tasks/literacy/portable-text.task.ts +169 -0
- package/tasks/literacy/studio-setup.task.ts +5 -2
- package/tasks/literacy/visual-editing.task.ts +1 -0
- package/LICENSE +0 -21
- package/tasks/frameworks.yaml +0 -98
- package/tasks/functions.yaml +0 -51
- package/tasks/groq.yaml +0 -216
- package/tasks/nextjs-live.yaml +0 -62
- package/tasks/studio-setup.yaml +0 -111
- package/tasks/visual-editing.yaml +0 -120
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sinks/slack-sink.ts
|
|
3
|
+
*
|
|
4
|
+
* Slack notification sink — posts formatted score change messages to
|
|
5
|
+
* configured Slack channels via incoming webhooks.
|
|
6
|
+
*
|
|
7
|
+
* By default, only posts when regressions are detected (avoids notification
|
|
8
|
+
* fatigue). Set `alwaysPost: true` to receive all reports.
|
|
9
|
+
*
|
|
10
|
+
* @see docs/design-docs/report-store/sink-architecture.md
|
|
11
|
+
* @see docs/design-docs/report-store/notifications.md
|
|
12
|
+
*/
|
|
13
|
+
import type { Report, SinkHealthStatus, SinkResult } from "../pipeline/types.js";
|
|
14
|
+
import type { ReportSink } from "./types.js";
|
|
15
|
+
export interface SlackSinkOptions {
|
|
16
|
+
/** Post all reports, not just regressions (default: false — only regressions) */
|
|
17
|
+
alwaysPost?: boolean;
|
|
18
|
+
}
|
|
19
|
+
export declare class SlackSink implements ReportSink {
|
|
20
|
+
private readonly webhookUrl;
|
|
21
|
+
private readonly channel?;
|
|
22
|
+
private readonly options;
|
|
23
|
+
readonly name = "slack";
|
|
24
|
+
constructor(webhookUrl: string, channel?: string | undefined, options?: SlackSinkOptions);
|
|
25
|
+
healthCheck(): Promise<SinkHealthStatus>;
|
|
26
|
+
publish(report: Report): Promise<SinkResult>;
|
|
27
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sinks/slack-sink.ts
|
|
3
|
+
*
|
|
4
|
+
* Slack notification sink — posts formatted score change messages to
|
|
5
|
+
* configured Slack channels via incoming webhooks.
|
|
6
|
+
*
|
|
7
|
+
* By default, only posts when regressions are detected (avoids notification
|
|
8
|
+
* fatigue). Set `alwaysPost: true` to receive all reports.
|
|
9
|
+
*
|
|
10
|
+
* @see docs/design-docs/report-store/sink-architecture.md
|
|
11
|
+
* @see docs/design-docs/report-store/notifications.md
|
|
12
|
+
*/
|
|
13
|
+
import { formatRegressionAlert, formatScoreSummary, } from "./format-slack.js";
|
|
14
|
+
export class SlackSink {
|
|
15
|
+
webhookUrl;
|
|
16
|
+
channel;
|
|
17
|
+
options;
|
|
18
|
+
name = "slack";
|
|
19
|
+
constructor(webhookUrl, channel, options = {}) {
|
|
20
|
+
this.webhookUrl = webhookUrl;
|
|
21
|
+
this.channel = channel;
|
|
22
|
+
this.options = options;
|
|
23
|
+
}
|
|
24
|
+
healthCheck() {
|
|
25
|
+
try {
|
|
26
|
+
const url = new URL(this.webhookUrl);
|
|
27
|
+
if (url.protocol !== "https:") {
|
|
28
|
+
return Promise.resolve({
|
|
29
|
+
healthy: false,
|
|
30
|
+
reason: `Webhook URL must use HTTPS, got ${url.protocol}`,
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
if (!this.webhookUrl.startsWith("https://hooks.slack.com/")) {
|
|
34
|
+
// Non-standard URL — warn but don't fail (could be a proxy)
|
|
35
|
+
return Promise.resolve({ healthy: true });
|
|
36
|
+
}
|
|
37
|
+
return Promise.resolve({ healthy: true });
|
|
38
|
+
}
|
|
39
|
+
catch {
|
|
40
|
+
return Promise.resolve({
|
|
41
|
+
healthy: false,
|
|
42
|
+
reason: `Invalid webhook URL: ${this.webhookUrl}`,
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
async publish(report) {
|
|
47
|
+
const hasRegressions = report.comparison !== undefined && report.comparison.regressed.length > 0;
|
|
48
|
+
let message;
|
|
49
|
+
if (hasRegressions) {
|
|
50
|
+
message = formatRegressionAlert(report);
|
|
51
|
+
}
|
|
52
|
+
else if (this.options.alwaysPost) {
|
|
53
|
+
message = formatScoreSummary(report);
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
return { reason: "No regressions detected", status: "skipped" };
|
|
57
|
+
}
|
|
58
|
+
const body = { ...message };
|
|
59
|
+
if (this.channel) {
|
|
60
|
+
body.channel = this.channel;
|
|
61
|
+
}
|
|
62
|
+
const response = await fetch(this.webhookUrl, {
|
|
63
|
+
body: JSON.stringify(body),
|
|
64
|
+
headers: { "Content-Type": "application/json" },
|
|
65
|
+
method: "POST",
|
|
66
|
+
});
|
|
67
|
+
if (!response.ok) {
|
|
68
|
+
return {
|
|
69
|
+
error: `Slack webhook returned HTTP ${response.status}`,
|
|
70
|
+
status: "failed",
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
return {
|
|
74
|
+
detail: hasRegressions ? "regression alert" : "score summary",
|
|
75
|
+
status: "success",
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
}
|
package/dist/sinks/types.d.ts
CHANGED
|
@@ -54,6 +54,6 @@ export interface ReportSink {
|
|
|
54
54
|
* - The payload's maxSeverity matches an enabled routing rule
|
|
55
55
|
* - The payload has regressions and the sink's `regression` rule is enabled
|
|
56
56
|
*
|
|
57
|
-
* @see docs/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
|
|
57
|
+
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
|
|
58
58
|
*/
|
|
59
59
|
export declare function shouldDeliver(routing: SinkRouting | undefined, payload: SinkPayload): boolean;
|
package/dist/sinks/types.js
CHANGED
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
* - The payload's maxSeverity matches an enabled routing rule
|
|
22
22
|
* - The payload has regressions and the sink's `regression` rule is enabled
|
|
23
23
|
*
|
|
24
|
-
* @see docs/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
|
|
24
|
+
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
|
|
25
25
|
*/
|
|
26
26
|
export function shouldDeliver(routing, payload) {
|
|
27
27
|
// No routing config = deliver everything (backward compatible)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sinks/webhook-sink.ts
|
|
3
|
+
*
|
|
4
|
+
* Generic HTTP webhook sink — POSTs the full report JSON to any endpoint.
|
|
5
|
+
* This is the universal adapter for integrations that don't have a
|
|
6
|
+
* dedicated sink (Airbyte, Zapier, custom services, etc.).
|
|
7
|
+
*
|
|
8
|
+
* @see docs/design-docs/report-store/sink-architecture.md
|
|
9
|
+
*/
|
|
10
|
+
import type { Report, SinkHealthStatus, SinkResult } from "../pipeline/types.js";
|
|
11
|
+
import type { ReportSink } from "./types.js";
|
|
12
|
+
export declare class WebhookSink implements ReportSink {
|
|
13
|
+
private readonly url;
|
|
14
|
+
private readonly headers;
|
|
15
|
+
readonly name: string;
|
|
16
|
+
constructor(url: string, headers?: Record<string, string>);
|
|
17
|
+
healthCheck(): Promise<SinkHealthStatus>;
|
|
18
|
+
publish(report: Report): Promise<SinkResult>;
|
|
19
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sinks/webhook-sink.ts
|
|
3
|
+
*
|
|
4
|
+
* Generic HTTP webhook sink — POSTs the full report JSON to any endpoint.
|
|
5
|
+
* This is the universal adapter for integrations that don't have a
|
|
6
|
+
* dedicated sink (Airbyte, Zapier, custom services, etc.).
|
|
7
|
+
*
|
|
8
|
+
* @see docs/design-docs/report-store/sink-architecture.md
|
|
9
|
+
*/
|
|
10
|
+
export class WebhookSink {
|
|
11
|
+
url;
|
|
12
|
+
headers;
|
|
13
|
+
name;
|
|
14
|
+
constructor(url, headers = {}) {
|
|
15
|
+
this.url = url;
|
|
16
|
+
this.headers = headers;
|
|
17
|
+
try {
|
|
18
|
+
this.name = `webhook:${new URL(url).hostname}`;
|
|
19
|
+
}
|
|
20
|
+
catch {
|
|
21
|
+
this.name = "webhook:invalid-url";
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
healthCheck() {
|
|
25
|
+
try {
|
|
26
|
+
new URL(this.url);
|
|
27
|
+
return Promise.resolve({ healthy: true });
|
|
28
|
+
}
|
|
29
|
+
catch {
|
|
30
|
+
return Promise.resolve({
|
|
31
|
+
healthy: false,
|
|
32
|
+
reason: `Invalid URL: ${this.url}`,
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
async publish(report) {
|
|
37
|
+
const response = await fetch(this.url, {
|
|
38
|
+
body: JSON.stringify(report),
|
|
39
|
+
headers: { "Content-Type": "application/json", ...this.headers },
|
|
40
|
+
method: "POST",
|
|
41
|
+
});
|
|
42
|
+
if (!response.ok) {
|
|
43
|
+
return {
|
|
44
|
+
error: `HTTP ${response.status}: ${response.statusText}`,
|
|
45
|
+
status: "failed",
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
return { detail: `HTTP ${response.status}`, status: "success" };
|
|
49
|
+
}
|
|
50
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge probe: Sanity defineType API.
|
|
3
|
+
*
|
|
4
|
+
* Tests whether models know the current typed schema API
|
|
5
|
+
* vs the legacy untyped format.
|
|
6
|
+
*
|
|
7
|
+
* Knowledge probes test the model's built-in knowledge WITHOUT providing documentation
|
|
8
|
+
* context (no `context.docs`). Contrast with "literacy" tasks which inject docs.
|
|
9
|
+
*/
|
|
10
|
+
import { defineTask } from "../../_vendor/ailf-core/index.js"
|
|
11
|
+
|
|
12
|
+
export default defineTask({
|
|
13
|
+
// "knowledge-probe" mode: no docs injected — measures what the model already knows
|
|
14
|
+
mode: "knowledge-probe",
|
|
15
|
+
id: "kp-define-type-api",
|
|
16
|
+
title: "What is Sanity's defineType API?",
|
|
17
|
+
description:
|
|
18
|
+
"Explain how to define document schemas in Sanity using the defineType, " +
|
|
19
|
+
"defineField, and defineArrayMember helper functions.",
|
|
20
|
+
// Used for score aggregation in reports and --area CLI filtering
|
|
21
|
+
area: "studio",
|
|
22
|
+
// Metadata for reporting; does not affect evaluation behavior
|
|
23
|
+
difficulty: "basic",
|
|
24
|
+
// Freeform labels for --tag CLI filtering
|
|
25
|
+
tags: ["knowledge-probe", "studio", "schema"],
|
|
26
|
+
// Controls how the probe explores knowledge: "breadth-first" covers many topics, "depth-first" drills deep
|
|
27
|
+
probeStrategy: "breadth-first",
|
|
28
|
+
prompt: {
|
|
29
|
+
// Direct prompt text sent to the model (knowledge probes use text, literacy tasks use vars.task with a template)
|
|
30
|
+
text:
|
|
31
|
+
"Explain Sanity's schema definition API:\n\n" +
|
|
32
|
+
"1. What is `defineType` and how do you use it?\n" +
|
|
33
|
+
"2. What are `defineField` and `defineArrayMember`?\n" +
|
|
34
|
+
"3. Why were these typed helpers introduced? What did they replace?\n" +
|
|
35
|
+
"4. Show a complete example of a document schema with various field types\n" +
|
|
36
|
+
"5. How do you add validation rules using the typed API?",
|
|
37
|
+
vars: {
|
|
38
|
+
task:
|
|
39
|
+
"Explain Sanity's defineType/defineField schema API with examples, " +
|
|
40
|
+
"motivation, and validation rules.",
|
|
41
|
+
},
|
|
42
|
+
},
|
|
43
|
+
assertions: [
|
|
44
|
+
{ type: "contains", value: "defineType" },
|
|
45
|
+
{ type: "contains", value: "defineField" },
|
|
46
|
+
// Inline llm-rubric (value is the rubric text itself, unlike template+criteria in literacy tasks)
|
|
47
|
+
{
|
|
48
|
+
type: "llm-rubric",
|
|
49
|
+
value:
|
|
50
|
+
"The response should accurately explain the typed schema helpers. " +
|
|
51
|
+
"Check that the code examples use the current API, not the legacy " +
|
|
52
|
+
"untyped format. Penalize if the response uses the old pattern " +
|
|
53
|
+
"without mentioning defineType.",
|
|
54
|
+
// weight: relative weight in the overall score (these two rubrics split evenly at 0.5 each)
|
|
55
|
+
weight: 0.5,
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
type: "llm-rubric",
|
|
59
|
+
value:
|
|
60
|
+
"Evaluate currency: does the response know about defineType " +
|
|
61
|
+
"(introduced in Sanity v3)? Does it mention TypeScript type " +
|
|
62
|
+
"inference benefits?",
|
|
63
|
+
weight: 0.5,
|
|
64
|
+
},
|
|
65
|
+
],
|
|
66
|
+
})
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Knowledge probe: GROQ projection syntax.
|
|
3
|
+
*
|
|
4
|
+
* Tests deep knowledge of Sanity's query language without
|
|
5
|
+
* providing any documentation context.
|
|
6
|
+
*
|
|
7
|
+
* See define-type-api.task.ts for detailed explanations of knowledge-probe properties.
|
|
8
|
+
*/
|
|
9
|
+
import { defineTask } from "../../_vendor/ailf-core/index.js"
|
|
10
|
+
|
|
11
|
+
export default defineTask({
|
|
12
|
+
mode: "knowledge-probe",
|
|
13
|
+
id: "kp-groq-projections",
|
|
14
|
+
title: "Explain GROQ Projection Syntax",
|
|
15
|
+
description:
|
|
16
|
+
"Explain GROQ's projection syntax in detail, including: " +
|
|
17
|
+
"object projections ({}), array projections, spread operator (...), " +
|
|
18
|
+
"computed field names, the dereference operator (->), and " +
|
|
19
|
+
"conditional projections using select().",
|
|
20
|
+
area: "groq",
|
|
21
|
+
difficulty: "intermediate",
|
|
22
|
+
tags: ["knowledge-probe", "groq", "syntax"],
|
|
23
|
+
// "depth-first" drills deep into one topic (vs "breadth-first" which covers many shallowly)
|
|
24
|
+
probeStrategy: "depth-first",
|
|
25
|
+
prompt: {
|
|
26
|
+
text:
|
|
27
|
+
"Explain GROQ's projection syntax in Sanity. Cover these topics:\n\n" +
|
|
28
|
+
"1. Basic object projections with `{}`\n" +
|
|
29
|
+
"2. Nested projections and the spread operator `...`\n" +
|
|
30
|
+
"3. Computed field names\n" +
|
|
31
|
+
"4. The dereference operator `->` for following references\n" +
|
|
32
|
+
"5. Array slicing with `[0..5]` and `[0...5]`\n" +
|
|
33
|
+
"6. Conditional projections using `select()`\n\n" +
|
|
34
|
+
"Provide working code examples for each.",
|
|
35
|
+
vars: {
|
|
36
|
+
task:
|
|
37
|
+
"Explain GROQ projection syntax with working code examples " +
|
|
38
|
+
"covering projections, spread, dereference, slicing, and select().",
|
|
39
|
+
},
|
|
40
|
+
},
|
|
41
|
+
assertions: [
|
|
42
|
+
{ type: "contains", value: "->" },
|
|
43
|
+
{ type: "contains", value: "select(" },
|
|
44
|
+
{
|
|
45
|
+
type: "llm-rubric",
|
|
46
|
+
value:
|
|
47
|
+
"The response should demonstrate accurate knowledge of GROQ " +
|
|
48
|
+
"projection syntax with working code examples. Check that the " +
|
|
49
|
+
"dereference operator, spread syntax, and select() are correctly " +
|
|
50
|
+
"explained with valid GROQ code.",
|
|
51
|
+
weight: 0.6,
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
type: "llm-rubric",
|
|
55
|
+
value:
|
|
56
|
+
"Evaluate whether the response reflects current GROQ syntax " +
|
|
57
|
+
"(post-2023). Check for deprecated patterns or outdated " +
|
|
58
|
+
"recommendations.",
|
|
59
|
+
weight: 0.4,
|
|
60
|
+
},
|
|
61
|
+
],
|
|
62
|
+
})
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
import { defineTask } from "../../_vendor/ailf-core/index.js"
|
|
2
|
+
|
|
3
|
+
export default [
|
|
4
|
+
defineTask({
|
|
5
|
+
id: "content-lake-mutations",
|
|
6
|
+
mode: "literacy",
|
|
7
|
+
title: "Content Lake - CRUD operations with @sanity/client",
|
|
8
|
+
description: "Content Lake - CRUD operations with @sanity/client",
|
|
9
|
+
// Used for score aggregation in reports and --area CLI filtering
|
|
10
|
+
area: "content-lake",
|
|
11
|
+
// Auto-generates an additional rubric checking if the response uses the provided docs
|
|
12
|
+
docCoverage: true,
|
|
13
|
+
context: {
|
|
14
|
+
// Sanity doc pages fetched and injected into the prompt as context; slug resolves via content lake
|
|
15
|
+
docs: [
|
|
16
|
+
{
|
|
17
|
+
slug: "js-client-mutations",
|
|
18
|
+
reason: "Creating and updating documents with @sanity/client",
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
slug: "js-client-transactions",
|
|
22
|
+
reason: "Atomic transactions — multiple mutations in one request",
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
slug: "js-client-deleting",
|
|
26
|
+
reason: "Deleting documents with @sanity/client",
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
slug: "http-patches",
|
|
30
|
+
reason: "Patch operations — set, unset, inc, insert, diffMatchPatch",
|
|
31
|
+
},
|
|
32
|
+
],
|
|
33
|
+
},
|
|
34
|
+
// Path (relative to eval package root) to a gold-standard implementation the grader compares against
|
|
35
|
+
referenceSolution: "reference-solutions/content-lake/mutations.ts",
|
|
36
|
+
prompt: {
|
|
37
|
+
vars: {
|
|
38
|
+
// The instruction the model under evaluation sees (interpolated into the prompt template)
|
|
39
|
+
task: `Implement a content management service using @sanity/client that
|
|
40
|
+
performs CRUD operations on Sanity documents.
|
|
41
|
+
|
|
42
|
+
Requirements:
|
|
43
|
+
1. Create a new document with typed fields
|
|
44
|
+
2. Patch an existing document (set fields, unset fields, increment a counter)
|
|
45
|
+
3. Delete a document by ID
|
|
46
|
+
4. Use a transaction to atomically create multiple documents
|
|
47
|
+
5. Include proper TypeScript types
|
|
48
|
+
|
|
49
|
+
Provide a complete, reusable implementation.`,
|
|
50
|
+
// file:// URI resolved at runtime to a generated canonical context file (from npx @sanity/ailf fetch-docs)
|
|
51
|
+
docs: "file://contexts/canonical/content-lake-mutations.md",
|
|
52
|
+
},
|
|
53
|
+
},
|
|
54
|
+
// Grading criteria applied to the model's response; each assertion produces a pass/fail contributing to the task score
|
|
55
|
+
assertions: [
|
|
56
|
+
// llm-rubric with template: the grader model evaluates the response against these criteria
|
|
57
|
+
{
|
|
58
|
+
type: "llm-rubric",
|
|
59
|
+
template: "task-completion",
|
|
60
|
+
criteria: [
|
|
61
|
+
"Document creation with client.create() or client.createOrReplace()",
|
|
62
|
+
"Patch operations with set/unset/inc",
|
|
63
|
+
"Document deletion with client.delete()",
|
|
64
|
+
"Transaction with multiple mutations",
|
|
65
|
+
],
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
type: "llm-rubric",
|
|
69
|
+
template: "code-correctness",
|
|
70
|
+
criteria: [
|
|
71
|
+
"Correct @sanity/client API usage",
|
|
72
|
+
"Proper patch chaining syntax",
|
|
73
|
+
"Transaction commit pattern",
|
|
74
|
+
"TypeScript types for document shapes",
|
|
75
|
+
],
|
|
76
|
+
},
|
|
77
|
+
// Deterministic string-match assertions — fast, cheap, no grader model needed
|
|
78
|
+
{ type: "contains", value: "createClient", weight: 1 },
|
|
79
|
+
// contains-any: passes if ANY string in the array appears in the response
|
|
80
|
+
{
|
|
81
|
+
type: "contains-any",
|
|
82
|
+
value: [".create(", ".createOrReplace(", ".createIfNotExists("],
|
|
83
|
+
weight: 1,
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
type: "contains-any",
|
|
87
|
+
value: [".patch(", ".set("],
|
|
88
|
+
weight: 1,
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
type: "contains-any",
|
|
92
|
+
value: [".transaction(", "transaction"],
|
|
93
|
+
weight: 1,
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
type: "contains-any",
|
|
97
|
+
value: [".delete(", "client.delete"],
|
|
98
|
+
weight: 1,
|
|
99
|
+
},
|
|
100
|
+
],
|
|
101
|
+
}),
|
|
102
|
+
|
|
103
|
+
defineTask({
|
|
104
|
+
id: "content-lake-realtime",
|
|
105
|
+
mode: "literacy",
|
|
106
|
+
title: "Content Lake - Real-time listeners with @sanity/client",
|
|
107
|
+
description: "Content Lake - Real-time listeners with @sanity/client",
|
|
108
|
+
area: "content-lake",
|
|
109
|
+
docCoverage: true,
|
|
110
|
+
context: {
|
|
111
|
+
docs: [
|
|
112
|
+
{
|
|
113
|
+
slug: "js-client-realtime",
|
|
114
|
+
reason: "Listening to content updates with @sanity/client",
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
slug: "realtime-updates",
|
|
118
|
+
reason: "Real-time updates — listener protocol, event types",
|
|
119
|
+
},
|
|
120
|
+
{
|
|
121
|
+
slug: "js-client-getting-started",
|
|
122
|
+
reason: "Client setup — configuration, tokens, API version",
|
|
123
|
+
},
|
|
124
|
+
{
|
|
125
|
+
slug: "js-client-querying",
|
|
126
|
+
reason:
|
|
127
|
+
"Querying content — needed for initial data fetch before listening",
|
|
128
|
+
},
|
|
129
|
+
],
|
|
130
|
+
},
|
|
131
|
+
referenceSolution: "reference-solutions/content-lake/realtime.ts",
|
|
132
|
+
prompt: {
|
|
133
|
+
vars: {
|
|
134
|
+
task: `Implement real-time content synchronization using Sanity's listener API.
|
|
135
|
+
|
|
136
|
+
Requirements:
|
|
137
|
+
1. Set up a listener that watches for changes to documents of a specific type
|
|
138
|
+
2. Handle mutation events (create, update, delete)
|
|
139
|
+
3. Apply changes to a local in-memory cache
|
|
140
|
+
4. Handle reconnection and error scenarios
|
|
141
|
+
5. Provide a way to unsubscribe/clean up the listener
|
|
142
|
+
|
|
143
|
+
Provide a complete implementation with TypeScript types.`,
|
|
144
|
+
docs: "file://contexts/canonical/content-lake-realtime.md",
|
|
145
|
+
},
|
|
146
|
+
},
|
|
147
|
+
assertions: [
|
|
148
|
+
{
|
|
149
|
+
type: "llm-rubric",
|
|
150
|
+
template: "task-completion",
|
|
151
|
+
criteria: [
|
|
152
|
+
"Listener setup with GROQ filter for document type",
|
|
153
|
+
"Mutation event handling (create, update, delete differentiation)",
|
|
154
|
+
"Local cache synchronization",
|
|
155
|
+
"Cleanup/unsubscribe mechanism",
|
|
156
|
+
"Error or reconnection handling",
|
|
157
|
+
],
|
|
158
|
+
},
|
|
159
|
+
{
|
|
160
|
+
type: "llm-rubric",
|
|
161
|
+
template: "code-correctness",
|
|
162
|
+
criteria: [
|
|
163
|
+
"Correct client.listen() API usage",
|
|
164
|
+
"Proper GROQ filter syntax in listener",
|
|
165
|
+
"Subscription cleanup pattern (unsubscribe)",
|
|
166
|
+
"No deprecated listener APIs",
|
|
167
|
+
],
|
|
168
|
+
},
|
|
169
|
+
{
|
|
170
|
+
type: "contains-any",
|
|
171
|
+
value: [".listen(", "client.listen"],
|
|
172
|
+
weight: 1,
|
|
173
|
+
},
|
|
174
|
+
{
|
|
175
|
+
type: "contains-any",
|
|
176
|
+
value: ["subscribe", ".subscribe("],
|
|
177
|
+
weight: 1,
|
|
178
|
+
},
|
|
179
|
+
],
|
|
180
|
+
}),
|
|
181
|
+
]
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import { defineTask } from "../../_vendor/ailf-core/index.js"
|
|
2
|
+
|
|
3
|
+
// See content-lake.task.ts for detailed explanations of common task properties
|
|
4
|
+
export default [
|
|
5
|
+
defineTask({
|
|
6
|
+
id: "remix-integration",
|
|
7
|
+
mode: "literacy",
|
|
8
|
+
title: "Frameworks - Remix integration with data fetching",
|
|
9
|
+
description: "Frameworks - Remix integration with data fetching",
|
|
10
|
+
area: "frameworks",
|
|
11
|
+
docCoverage: true,
|
|
12
|
+
context: {
|
|
13
|
+
docs: [
|
|
14
|
+
{
|
|
15
|
+
slug: "displaying-content-in-a-react-router-front-end",
|
|
16
|
+
reason: "React Router front-end content display guide",
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
slug: "visual-editing-with-react-router",
|
|
20
|
+
reason: "React Router / Remix integration with visual editing",
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
slug: "functions-js-client",
|
|
24
|
+
reason: "Configuring @sanity/client for data fetching",
|
|
25
|
+
},
|
|
26
|
+
],
|
|
27
|
+
},
|
|
28
|
+
referenceSolution: "reference-solutions/frameworks/remix.tsx",
|
|
29
|
+
prompt: {
|
|
30
|
+
vars: {
|
|
31
|
+
task: `Integrate Sanity into a Remix application:
|
|
32
|
+
|
|
33
|
+
1. Set up the Sanity client
|
|
34
|
+
2. Create a loader that fetches blog posts using GROQ
|
|
35
|
+
3. Build a route component that renders the fetched posts
|
|
36
|
+
4. Handle loading and error states properly
|
|
37
|
+
|
|
38
|
+
Provide all necessary files for a working Remix + Sanity integration.`,
|
|
39
|
+
docs: "file://contexts/canonical/remix-integration.md",
|
|
40
|
+
},
|
|
41
|
+
},
|
|
42
|
+
assertions: [
|
|
43
|
+
{
|
|
44
|
+
type: "llm-rubric",
|
|
45
|
+
template: "task-completion",
|
|
46
|
+
criteria: [
|
|
47
|
+
"Sanity client configuration",
|
|
48
|
+
"Remix loader function with GROQ query",
|
|
49
|
+
"Route component using useLoaderData",
|
|
50
|
+
"Proper typing",
|
|
51
|
+
],
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
type: "llm-rubric",
|
|
55
|
+
template: "code-correctness",
|
|
56
|
+
criteria: [
|
|
57
|
+
"Modern Remix patterns (v2 conventions)",
|
|
58
|
+
"Proper loader/component separation",
|
|
59
|
+
"Valid GROQ queries",
|
|
60
|
+
"No deprecated APIs",
|
|
61
|
+
],
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
type: "contains-any",
|
|
65
|
+
value: ["useLoaderData", "loader"],
|
|
66
|
+
weight: 1,
|
|
67
|
+
},
|
|
68
|
+
],
|
|
69
|
+
}),
|
|
70
|
+
|
|
71
|
+
defineTask({
|
|
72
|
+
id: "nuxt-integration",
|
|
73
|
+
mode: "literacy",
|
|
74
|
+
title: "Frameworks - Nuxt 4 integration",
|
|
75
|
+
description: "Frameworks - Nuxt 4 integration",
|
|
76
|
+
area: "frameworks",
|
|
77
|
+
docCoverage: true,
|
|
78
|
+
context: {
|
|
79
|
+
docs: [
|
|
80
|
+
{
|
|
81
|
+
slug: "displaying-content-in-nuxt-js",
|
|
82
|
+
reason: "Nuxt.js front-end content display guide",
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
slug: "visual-editing-with-nuxt",
|
|
86
|
+
reason: "Nuxt visual editing integration",
|
|
87
|
+
},
|
|
88
|
+
],
|
|
89
|
+
},
|
|
90
|
+
referenceSolution: "reference-solutions/frameworks/nuxt.ts",
|
|
91
|
+
prompt: {
|
|
92
|
+
vars: {
|
|
93
|
+
task: `Integrate Sanity into a Nuxt 4 application:
|
|
94
|
+
|
|
95
|
+
1. Install and configure the @nuxtjs/sanity module
|
|
96
|
+
2. Create a page that fetches and displays blog posts
|
|
97
|
+
3. Use Nuxt composables for data fetching
|
|
98
|
+
|
|
99
|
+
Provide all necessary configuration and component code.`,
|
|
100
|
+
docs: "file://contexts/canonical/nuxt-integration.md",
|
|
101
|
+
},
|
|
102
|
+
},
|
|
103
|
+
assertions: [
|
|
104
|
+
{
|
|
105
|
+
type: "llm-rubric",
|
|
106
|
+
template: "task-completion",
|
|
107
|
+
criteria: [
|
|
108
|
+
"@nuxtjs/sanity module setup in nuxt.config.ts",
|
|
109
|
+
"Page component using Nuxt data fetching composables",
|
|
110
|
+
"Sanity GROQ query",
|
|
111
|
+
],
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
type: "llm-rubric",
|
|
115
|
+
template: "code-correctness",
|
|
116
|
+
criteria: [
|
|
117
|
+
"Nuxt 3 module configuration syntax",
|
|
118
|
+
"Uses useSanityQuery or equivalent composable",
|
|
119
|
+
"Proper Nuxt 3 patterns (not Nuxt 2)",
|
|
120
|
+
],
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
type: "contains-any",
|
|
124
|
+
value: ["@nuxtjs/sanity", "useSanityQuery", "sanity:"],
|
|
125
|
+
weight: 1,
|
|
126
|
+
},
|
|
127
|
+
],
|
|
128
|
+
}),
|
|
129
|
+
]
|