npm - @sanity/ailf - Versions diffs - 0.1.0 - Mend

@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (530) hide show

package/README.md +89 -0
package/bin/ailf.js +64 -0
package/canonical/grader-references/README.md +88 -0
package/canonical/grader-references/groq.yaml +234 -0
package/canonical/grader-references/studio-setup.yaml +275 -0
package/canonical/reference-solutions/.gitkeep +1 -0
package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
package/canonical/reference-solutions/groq/joins-references.ts +300 -0
package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
package/config/bigquery/README.md +74 -0
package/config/bigquery/views/area_scores.sql +87 -0
package/config/bigquery/views/reports.sql +49 -0
package/config/features.yaml +116 -0
package/config/models.yaml +115 -0
package/config/prompts.yaml +75 -0
package/config/rubrics.yaml +62 -0
package/config/schedules.yaml +43 -0
package/config/sinks.yaml +54 -0
package/config/sources.yaml +51 -0
package/config/thresholds.yaml +49 -0
package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
package/dist/_vendor/ailf-core/examples/index.js +285 -0
package/dist/_vendor/ailf-core/index.d.ts +17 -0
package/dist/_vendor/ailf-core/index.js +17 -0
package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
package/dist/_vendor/ailf-core/ports/context.js +14 -0
package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
package/dist/_vendor/ailf-core/ports/index.js +7 -0
package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
package/dist/_vendor/ailf-core/ports/logger.js +11 -0
package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
package/dist/_vendor/ailf-core/schemas/index.js +16 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
package/dist/_vendor/ailf-core/services/index.js +12 -0
package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
package/dist/_vendor/ailf-core/services/scoring.js +222 -0
package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
package/dist/_vendor/ailf-core/types/index.js +21 -0
package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
package/dist/_vendor/ailf-shared/document-ref.js +1 -0
package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
package/dist/_vendor/ailf-shared/index.d.ts +16 -0
package/dist/_vendor/ailf-shared/index.js +16 -0
package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
package/dist/_vendor/ailf-shared/score-grades.js +23 -0
package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
package/dist/adapters/cache/content-lake-cache.js +59 -0
package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
package/dist/adapters/cache/filesystem-cache.js +54 -0
package/dist/adapters/cache/index.d.ts +2 -0
package/dist/adapters/cache/index.js +2 -0
package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
package/dist/adapters/config-sources/file-config-adapter.js +96 -0
package/dist/adapters/config-sources/index.d.ts +2 -0
package/dist/adapters/config-sources/index.js +2 -0
package/dist/adapters/doc-fetchers/index.d.ts +1 -0
package/dist/adapters/doc-fetchers/index.js +1 -0
package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
package/dist/adapters/eval-runners/index.d.ts +1 -0
package/dist/adapters/eval-runners/index.js +1 -0
package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
package/dist/adapters/index.d.ts +12 -0
package/dist/adapters/index.js +12 -0
package/dist/adapters/loggers/console-logger.d.ts +22 -0
package/dist/adapters/loggers/console-logger.js +54 -0
package/dist/adapters/loggers/index.d.ts +9 -0
package/dist/adapters/loggers/index.js +9 -0
package/dist/adapters/loggers/json-logger.d.ts +18 -0
package/dist/adapters/loggers/json-logger.js +33 -0
package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
package/dist/adapters/loggers/quiet-logger.js +30 -0
package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
package/dist/adapters/task-sources/composite-task-source.js +59 -0
package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
package/dist/adapters/task-sources/index.d.ts +7 -0
package/dist/adapters/task-sources/index.js +7 -0
package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
package/dist/adapters/task-sources/repo-schemas.js +234 -0
package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
package/dist/adapters/task-sources/repo-task-source.js +104 -0
package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
package/dist/adapters/task-sources/repo-trigger.js +153 -0
package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
package/dist/adapters/task-sources/repo-validation.js +164 -0
package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
package/dist/adapters/task-sources/yaml-task-source.js +136 -0
package/dist/agent-observer/agentic-provider.d.ts +132 -0
package/dist/agent-observer/agentic-provider.js +983 -0
package/dist/agent-observer/classifier.d.ts +62 -0
package/dist/agent-observer/classifier.js +269 -0
package/dist/agent-observer/index.d.ts +7 -0
package/dist/agent-observer/index.js +4 -0
package/dist/agent-observer/pricing.d.ts +35 -0
package/dist/agent-observer/pricing.js +82 -0
package/dist/agent-observer/provider.d.ts +77 -0
package/dist/agent-observer/provider.js +151 -0
package/dist/agent-observer/proxy.d.ts +91 -0
package/dist/agent-observer/proxy.js +321 -0
package/dist/agent-observer/test-imports.d.ts +7 -0
package/dist/agent-observer/test-imports.js +185 -0
package/dist/agent-observer/types.d.ts +137 -0
package/dist/agent-observer/types.js +16 -0
package/dist/assertions/source-isolation.d.ts +72 -0
package/dist/assertions/source-isolation.js +117 -0
package/dist/cli.d.ts +24 -0
package/dist/cli.js +199 -0
package/dist/commands/agent-report.d.ts +5 -0
package/dist/commands/agent-report.js +69 -0
package/dist/commands/baseline.d.ts +9 -0
package/dist/commands/baseline.js +141 -0
package/dist/commands/cache.d.ts +13 -0
package/dist/commands/cache.js +135 -0
package/dist/commands/calculate-scores.d.ts +8 -0
package/dist/commands/calculate-scores.js +48 -0
package/dist/commands/compare.d.ts +8 -0
package/dist/commands/compare.js +120 -0
package/dist/commands/completion.d.ts +18 -0
package/dist/commands/completion.js +260 -0
package/dist/commands/coverage-audit.d.ts +7 -0
package/dist/commands/coverage-audit.js +40 -0
package/dist/commands/discovery-report.d.ts +10 -0
package/dist/commands/discovery-report.js +44 -0
package/dist/commands/eval.d.ts +9 -0
package/dist/commands/eval.js +35 -0
package/dist/commands/explain-handler.d.ts +34 -0
package/dist/commands/explain-handler.js +719 -0
package/dist/commands/fetch-docs.d.ts +8 -0
package/dist/commands/fetch-docs.js +128 -0
package/dist/commands/generate-configs.d.ts +8 -0
package/dist/commands/generate-configs.js +46 -0
package/dist/commands/grader/index.d.ts +11 -0
package/dist/commands/grader/index.js +118 -0
package/dist/commands/init.d.ts +19 -0
package/dist/commands/init.js +150 -0
package/dist/commands/interactive.d.ts +12 -0
package/dist/commands/interactive.js +238 -0
package/dist/commands/lookup-doc.d.ts +15 -0
package/dist/commands/lookup-doc.js +84 -0
package/dist/commands/measure-retrieval.d.ts +5 -0
package/dist/commands/measure-retrieval.js +65 -0
package/dist/commands/pipeline-action.d.ts +71 -0
package/dist/commands/pipeline-action.js +305 -0
package/dist/commands/pipeline.d.ts +62 -0
package/dist/commands/pipeline.js +53 -0
package/dist/commands/pr-comment.d.ts +8 -0
package/dist/commands/pr-comment.js +47 -0
package/dist/commands/publish.d.ts +26 -0
package/dist/commands/publish.js +253 -0
package/dist/commands/readiness-report.d.ts +10 -0
package/dist/commands/readiness-report.js +104 -0
package/dist/commands/shared/options.d.ts +29 -0
package/dist/commands/shared/options.js +57 -0
package/dist/commands/update-quality-scores.d.ts +5 -0
package/dist/commands/update-quality-scores.js +20 -0
package/dist/commands/validate-tasks.d.ts +16 -0
package/dist/commands/validate-tasks.js +93 -0
package/dist/commands/validate.d.ts +9 -0
package/dist/commands/validate.js +73 -0
package/dist/commands/webhook-server.d.ts +5 -0
package/dist/commands/webhook-server.js +30 -0
package/dist/commands/weekly-digest.d.ts +10 -0
package/dist/commands/weekly-digest.js +104 -0
package/dist/composition-root.d.ts +26 -0
package/dist/composition-root.js +107 -0
package/dist/interpolate.d.ts +26 -0
package/dist/interpolate.js +70 -0
package/dist/job-store.d.ts +104 -0
package/dist/job-store.js +188 -0
package/dist/lib/agent-behavior-report.d.ts +8 -0
package/dist/lib/agent-behavior-report.js +185 -0
package/dist/lib/baseline.d.ts +19 -0
package/dist/lib/baseline.js +153 -0
package/dist/lib/calculate-scores.d.ts +23 -0
package/dist/lib/calculate-scores.js +42 -0
package/dist/lib/compare.d.ts +18 -0
package/dist/lib/compare.js +170 -0
package/dist/lib/coverage-audit.d.ts +4 -0
package/dist/lib/coverage-audit.js +42 -0
package/dist/lib/discovery-report.d.ts +13 -0
package/dist/lib/discovery-report.js +57 -0
package/dist/lib/fetch-docs.d.ts +30 -0
package/dist/lib/fetch-docs.js +171 -0
package/dist/lib/generate-configs.d.ts +25 -0
package/dist/lib/generate-configs.js +42 -0
package/dist/lib/grader-api.d.ts +21 -0
package/dist/lib/grader-api.js +34 -0
package/dist/lib/grader-compare.d.ts +19 -0
package/dist/lib/grader-compare.js +91 -0
package/dist/lib/grader-consistency.d.ts +27 -0
package/dist/lib/grader-consistency.js +79 -0
package/dist/lib/grader-sensitivity.d.ts +19 -0
package/dist/lib/grader-sensitivity.js +75 -0
package/dist/lib/grader-validate.d.ts +19 -0
package/dist/lib/grader-validate.js +78 -0
package/dist/lib/measure-retrieval.d.ts +14 -0
package/dist/lib/measure-retrieval.js +71 -0
package/dist/lib/pr-comment.d.ts +16 -0
package/dist/lib/pr-comment.js +28 -0
package/dist/lib/readiness-report.d.ts +13 -0
package/dist/lib/readiness-report.js +108 -0
package/dist/lib/webhook-server.d.ts +11 -0
package/dist/lib/webhook-server.js +24 -0
package/dist/lib/weekly-digest.d.ts +24 -0
package/dist/lib/weekly-digest.js +148 -0
package/dist/orchestration/build-app-context.d.ts +27 -0
package/dist/orchestration/build-app-context.js +81 -0
package/dist/orchestration/build-step-sequence.d.ts +15 -0
package/dist/orchestration/build-step-sequence.js +84 -0
package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
package/dist/orchestration/config-to-source-overrides.js +28 -0
package/dist/orchestration/env-bridge.d.ts +21 -0
package/dist/orchestration/env-bridge.js +66 -0
package/dist/orchestration/index.d.ts +11 -0
package/dist/orchestration/index.js +11 -0
package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
package/dist/orchestration/pipeline-orchestrator.js +153 -0
package/dist/orchestration/step-runner.d.ts +20 -0
package/dist/orchestration/step-runner.js +88 -0
package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
package/dist/orchestration/steps/calculate-scores-step.js +95 -0
package/dist/orchestration/steps/callback-step.d.ts +24 -0
package/dist/orchestration/steps/callback-step.js +76 -0
package/dist/orchestration/steps/compare-step.d.ts +14 -0
package/dist/orchestration/steps/compare-step.js +92 -0
package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
package/dist/orchestration/steps/discovery-report-step.js +55 -0
package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
package/dist/orchestration/steps/fetch-docs-step.js +135 -0
package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
package/dist/orchestration/steps/gap-analysis-step.js +136 -0
package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
package/dist/orchestration/steps/generate-configs-step.js +85 -0
package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
package/dist/orchestration/steps/grader-consistency-step.js +64 -0
package/dist/orchestration/steps/index.d.ts +19 -0
package/dist/orchestration/steps/index.js +19 -0
package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
package/dist/orchestration/steps/publish-report-step.js +216 -0
package/dist/orchestration/steps/readiness-step.d.ts +13 -0
package/dist/orchestration/steps/readiness-step.js +91 -0
package/dist/orchestration/steps/report-step.d.ts +12 -0
package/dist/orchestration/steps/report-step.js +49 -0
package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
package/dist/orchestration/steps/run-eval-step.js +195 -0
package/dist/orchestration/steps/validate-step.d.ts +12 -0
package/dist/orchestration/steps/validate-step.js +41 -0
package/dist/pipeline/agent-behavior-report.d.ts +53 -0
package/dist/pipeline/agent-behavior-report.js +132 -0
package/dist/pipeline/attribution.d.ts +47 -0
package/dist/pipeline/attribution.js +226 -0
package/dist/pipeline/baseline.d.ts +37 -0
package/dist/pipeline/baseline.js +141 -0
package/dist/pipeline/cache.d.ts +101 -0
package/dist/pipeline/cache.js +283 -0
package/dist/pipeline/calculate-scores.d.ts +102 -0
package/dist/pipeline/calculate-scores.js +1128 -0
package/dist/pipeline/callback-delivery.d.ts +50 -0
package/dist/pipeline/callback-delivery.js +89 -0
package/dist/pipeline/checks.d.ts +39 -0
package/dist/pipeline/checks.js +280 -0
package/dist/pipeline/classify-url.d.ts +61 -0
package/dist/pipeline/classify-url.js +93 -0
package/dist/pipeline/compare.d.ts +31 -0
package/dist/pipeline/compare.js +208 -0
package/dist/pipeline/coverage-audit.d.ts +39 -0
package/dist/pipeline/coverage-audit.js +165 -0
package/dist/pipeline/degradations.d.ts +85 -0
package/dist/pipeline/degradations.js +242 -0
package/dist/pipeline/discovery-report.d.ts +55 -0
package/dist/pipeline/discovery-report.js +178 -0
package/dist/pipeline/eval-constants.d.ts +68 -0
package/dist/pipeline/eval-constants.js +111 -0
package/dist/pipeline/eval-fingerprint.d.ts +66 -0
package/dist/pipeline/eval-fingerprint.js +175 -0
package/dist/pipeline/expand-tasks.d.ts +220 -0
package/dist/pipeline/expand-tasks.js +421 -0
package/dist/pipeline/failure-modes.d.ts +46 -0
package/dist/pipeline/failure-modes.js +348 -0
package/dist/pipeline/fetch-url-content.d.ts +44 -0
package/dist/pipeline/fetch-url-content.js +93 -0
package/dist/pipeline/gap-analysis.d.ts +48 -0
package/dist/pipeline/gap-analysis.js +231 -0
package/dist/pipeline/generate-configs.d.ts +72 -0
package/dist/pipeline/generate-configs.js +395 -0
package/dist/pipeline/grader-api.d.ts +49 -0
package/dist/pipeline/grader-api.js +200 -0
package/dist/pipeline/grader-compare-runner.d.ts +44 -0
package/dist/pipeline/grader-compare-runner.js +301 -0
package/dist/pipeline/grader-comparison.d.ts +111 -0
package/dist/pipeline/grader-comparison.js +161 -0
package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
package/dist/pipeline/grader-consistency-runner.js +270 -0
package/dist/pipeline/grader-consistency.d.ts +103 -0
package/dist/pipeline/grader-consistency.js +146 -0
package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
package/dist/pipeline/grader-sensitivity-runner.js +282 -0
package/dist/pipeline/grader-sensitivity.d.ts +94 -0
package/dist/pipeline/grader-sensitivity.js +144 -0
package/dist/pipeline/grader-validate-runner.d.ts +38 -0
package/dist/pipeline/grader-validate-runner.js +229 -0
package/dist/pipeline/grader-validation.d.ts +107 -0
package/dist/pipeline/grader-validation.js +169 -0
package/dist/pipeline/map-request-to-config.d.ts +19 -0
package/dist/pipeline/map-request-to-config.js +80 -0
package/dist/pipeline/measure-retrieval.d.ts +59 -0
package/dist/pipeline/measure-retrieval.js +111 -0
package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
package/dist/pipeline/mirror-repo-tasks.js +350 -0
package/dist/pipeline/plan-format.d.ts +33 -0
package/dist/pipeline/plan-format.js +202 -0
package/dist/pipeline/plan.d.ts +169 -0
package/dist/pipeline/plan.js +708 -0
package/dist/pipeline/pr-comment.d.ts +19 -0
package/dist/pipeline/pr-comment.js +502 -0
package/dist/pipeline/probe.d.ts +52 -0
package/dist/pipeline/probe.js +390 -0
package/dist/pipeline/provenance.d.ts +47 -0
package/dist/pipeline/provenance.js +146 -0
package/dist/pipeline/readiness-report.d.ts +87 -0
package/dist/pipeline/readiness-report.js +205 -0
package/dist/pipeline/release-classification.d.ts +54 -0
package/dist/pipeline/release-classification.js +238 -0
package/dist/pipeline/release-report.d.ts +37 -0
package/dist/pipeline/release-report.js +222 -0
package/dist/pipeline/repo-eval-comment.d.ts +37 -0
package/dist/pipeline/repo-eval-comment.js +165 -0
package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
package/dist/pipeline/repo-threshold-evaluator.js +162 -0
package/dist/pipeline/resolve-mappings.d.ts +35 -0
package/dist/pipeline/resolve-mappings.js +72 -0
package/dist/pipeline/retrieval-metrics.d.ts +39 -0
package/dist/pipeline/retrieval-metrics.js +136 -0
package/dist/pipeline/reverse-mapping.d.ts +67 -0
package/dist/pipeline/reverse-mapping.js +88 -0
package/dist/pipeline/schemas.d.ts +9 -0
package/dist/pipeline/schemas.js +9 -0
package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
package/dist/pipeline/steps/calculate-scores-step.js +89 -0
package/dist/pipeline/steps/compare-step.d.ts +18 -0
package/dist/pipeline/steps/compare-step.js +90 -0
package/dist/pipeline/steps/eval-step.d.ts +53 -0
package/dist/pipeline/steps/eval-step.js +347 -0
package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
package/dist/pipeline/steps/fetch-docs-step.js +84 -0
package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
package/dist/pipeline/steps/generate-configs-step.js +98 -0
package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
package/dist/pipeline/steps/grader-consistency-step.js +74 -0
package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
package/dist/pipeline/steps/publish-report-step.js +243 -0
package/dist/pipeline/steps/report-step.d.ts +13 -0
package/dist/pipeline/steps/report-step.js +56 -0
package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
package/dist/pipeline/steps/update-scores-step.js +42 -0
package/dist/pipeline/targeted-loo.d.ts +88 -0
package/dist/pipeline/targeted-loo.js +203 -0
package/dist/pipeline/thresholds.d.ts +27 -0
package/dist/pipeline/thresholds.js +245 -0
package/dist/pipeline/types.d.ts +10 -0
package/dist/pipeline/types.js +10 -0
package/dist/pipeline/validate.d.ts +67 -0
package/dist/pipeline/validate.js +406 -0
package/dist/pipeline/webhook-server.d.ts +37 -0
package/dist/pipeline/webhook-server.js +133 -0
package/dist/report-store.d.ts +84 -0
package/dist/report-store.js +208 -0
package/dist/sanity/client.d.ts +38 -0
package/dist/sanity/client.js +86 -0
package/dist/sanity/portable-text.d.ts +11 -0
package/dist/sanity/portable-text.js +211 -0
package/dist/sanity/queries.d.ts +133 -0
package/dist/sanity/queries.js +300 -0
package/dist/schedules/digest.d.ts +116 -0
package/dist/schedules/digest.js +156 -0
package/dist/schedules/index.d.ts +12 -0
package/dist/schedules/index.js +10 -0
package/dist/schedules/loader.d.ts +31 -0
package/dist/schedules/loader.js +73 -0
package/dist/schedules/schema.d.ts +9 -0
package/dist/schedules/schema.js +9 -0
package/dist/scripts/agent-behavior-report.d.ts +19 -0
package/dist/scripts/agent-behavior-report.js +315 -0
package/dist/scripts/baseline.d.ts +43 -0
package/dist/scripts/baseline.js +267 -0
package/dist/scripts/calculate-scores.d.ts +166 -0
package/dist/scripts/calculate-scores.js +1296 -0
package/dist/scripts/compare.d.ts +22 -0
package/dist/scripts/compare.js +334 -0
package/dist/scripts/coverage-audit.d.ts +44 -0
package/dist/scripts/coverage-audit.js +209 -0
package/dist/scripts/debug-eval.d.ts +19 -0
package/dist/scripts/debug-eval.js +73 -0
package/dist/scripts/discovery-report.d.ts +58 -0
package/dist/scripts/discovery-report.js +250 -0
package/dist/scripts/fetch-docs.d.ts +35 -0
package/dist/scripts/fetch-docs.js +472 -0
package/dist/scripts/generate-configs.d.ts +66 -0
package/dist/scripts/generate-configs.js +459 -0
package/dist/scripts/grader-api.d.ts +27 -0
package/dist/scripts/grader-api.js +206 -0
package/dist/scripts/grader-compare.d.ts +22 -0
package/dist/scripts/grader-compare.js +368 -0
package/dist/scripts/grader-consistency.d.ts +20 -0
package/dist/scripts/grader-consistency.js +313 -0
package/dist/scripts/grader-sensitivity.d.ts +22 -0
package/dist/scripts/grader-sensitivity.js +354 -0
package/dist/scripts/grader-validate.d.ts +19 -0
package/dist/scripts/grader-validate.js +267 -0
package/dist/scripts/measure-retrieval.d.ts +10 -0
package/dist/scripts/measure-retrieval.js +145 -0
package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
package/dist/scripts/pipeline.d.ts +76 -0
package/dist/scripts/pipeline.js +1031 -0
package/dist/scripts/pr-comment.d.ts +10 -0
package/dist/scripts/pr-comment.js +510 -0
package/dist/scripts/readiness-report.d.ts +88 -0
package/dist/scripts/readiness-report.js +342 -0
package/dist/scripts/update-quality-scores.d.ts +15 -0
package/dist/scripts/update-quality-scores.js +184 -0
package/dist/scripts/validate-task-sources.d.ts +21 -0
package/dist/scripts/validate-task-sources.js +210 -0
package/dist/scripts/validate.d.ts +13 -0
package/dist/scripts/validate.js +79 -0
package/dist/scripts/webhook-server.d.ts +26 -0
package/dist/scripts/webhook-server.js +147 -0
package/dist/scripts/weekly-digest.d.ts +24 -0
package/dist/scripts/weekly-digest.js +144 -0
package/dist/sinks/bigquery/index.d.ts +131 -0
package/dist/sinks/bigquery/index.js +222 -0
package/dist/sinks/format-slack.d.ts +64 -0
package/dist/sinks/format-slack.js +306 -0
package/dist/sinks/index.d.ts +23 -0
package/dist/sinks/index.js +18 -0
package/dist/sinks/loader.d.ts +18 -0
package/dist/sinks/loader.js +82 -0
package/dist/sinks/retry.d.ts +24 -0
package/dist/sinks/retry.js +52 -0
package/dist/sinks/schema.d.ts +9 -0
package/dist/sinks/schema.js +9 -0
package/dist/sinks/slack/format.d.ts +65 -0
package/dist/sinks/slack/format.js +327 -0
package/dist/sinks/slack/index.d.ts +27 -0
package/dist/sinks/slack/index.js +78 -0
package/dist/sinks/slack-sink.d.ts +27 -0
package/dist/sinks/slack-sink.js +78 -0
package/dist/sinks/types.d.ts +59 -0
package/dist/sinks/types.js +44 -0
package/dist/sinks/webhook/index.d.ts +19 -0
package/dist/sinks/webhook/index.js +50 -0
package/dist/sinks/webhook-sink.d.ts +19 -0
package/dist/sinks/webhook-sink.js +50 -0
package/dist/sources.d.ts +104 -0
package/dist/sources.js +292 -0
package/dist/webhook/budget.d.ts +42 -0
package/dist/webhook/budget.js +60 -0
package/dist/webhook/debounce.d.ts +67 -0
package/dist/webhook/debounce.js +76 -0
package/dist/webhook/dispatch.d.ts +45 -0
package/dist/webhook/dispatch.js +84 -0
package/dist/webhook/eval-request-handler.d.ts +87 -0
package/dist/webhook/eval-request-handler.js +181 -0
package/dist/webhook/handler.d.ts +88 -0
package/dist/webhook/handler.js +203 -0
package/dist/webhook/index.d.ts +17 -0
package/dist/webhook/index.js +12 -0
package/dist/webhook/types.d.ts +109 -0
package/dist/webhook/types.js +10 -0
package/package.json +72 -0
package/tasks/.expanded.agentic.yaml +51 -0
package/tasks/.expanded.yaml +66 -0
package/tasks/frameworks.yaml +98 -0
package/tasks/functions.yaml +51 -0
package/tasks/groq.yaml +216 -0
package/tasks/nextjs-live.yaml +62 -0
package/tasks/studio-setup.yaml +111 -0
package/tasks/visual-editing.yaml +120 -0

package/dist/webhook/handler.js ADDED Viewed

@@ -0,0 +1,203 @@
+/**
+ * webhook/handler.ts
+ *
+ * Platform-agnostic webhook handler for Sanity content change events.
+ *
+ * Receives Sanity webhook payloads, determines which evaluation areas
+ * are affected by the document change, debounces rapid edits, enforces
+ * daily budget limits, and dispatches scoped evaluations via GitHub Actions.
+ *
+ * This handler is stateless between requests (debounce and budget state
+ * is held in-memory by the enclosing WebhookHandler instance). It can be
+ * mounted in any HTTP framework: Express, Hono, Cloudflare Workers, etc.
+ *
+ * Flow:
+ * 1. Receive Sanity webhook payload
+ * 2. Extract document slug from payload
+ * 3. Look up affected areas via reverse mapping
+ * 4. If no areas affected → ignore (untracked document)
+ * 5. Check daily budget → rate-limit if exceeded
+ * 6. Push slug into debounce window
+ * 7. When debounce window closes → dispatch scoped eval via GitHub Actions
+ *
+ * @see docs/design-docs/report-store/visibility-workflows.md
+ */
+import { assessImpact, buildReverseMapping, } from "../pipeline/reverse-mapping.js";
+import { createBudgetTracker } from "./budget.js";
+import { createDebouncer } from "./debounce.js";
+import { dispatchEvaluation } from "./dispatch.js";
+// ---------------------------------------------------------------------------
+// Defaults
+// ---------------------------------------------------------------------------
+const DEFAULT_DEBOUNCE_MS = 300_000; // 5 minutes
+const DEFAULT_DAILY_BUDGET = 20;
+const DEFAULT_REPO = "sanity-labs/ai-literacy-framework";
+// ---------------------------------------------------------------------------
+// WebhookHandler
+// ---------------------------------------------------------------------------
+/**
+ * A stateful webhook handler that manages debouncing, budgeting,
+ * and evaluation dispatch.
+ *
+ * Create one instance per process and call `handle()` for each
+ * incoming webhook payload.
+ *
+ * ```ts
+ * const handler = new WebhookHandler({
+ *   githubToken: process.env.GITHUB_TOKEN!,
+ *   rootDir: "/path/to/packages/eval",
+ * })
+ *
+ * // In your HTTP handler:
+ * app.post("/webhook", async (req) => {
+ *   const result = await handler.handle(req.body)
+ *   return Response.json(result)
+ * })
+ * ```
+ */
+export class WebhookHandler {
+    budget;
+    config;
+    debouncer;
+    /** Recent dispatch results (for diagnostics) */
+    recentDispatches = [];
+    reverseMapping;
+    constructor(config) {
+        this.config = {
+            dailyBudget: config.dailyBudget ?? DEFAULT_DAILY_BUDGET,
+            debounceMs: config.debounceMs ?? DEFAULT_DEBOUNCE_MS,
+            githubRepo: config.githubRepo ?? DEFAULT_REPO,
+            githubToken: config.githubToken,
+            rootDir: config.rootDir ?? process.cwd(),
+            webhookSecret: config.webhookSecret,
+        };
+        this.reverseMapping = buildReverseMapping(this.config.rootDir);
+        this.budget = createBudgetTracker(this.config.dailyBudget);
+        this.debouncer = createDebouncer(this.config.debounceMs, this.onDebounceFlush.bind(this));
+    }
+    /**
+     * Get handler diagnostics (for health check endpoints).
+     */
+    diagnostics() {
+        const budgetState = this.budget.currentState();
+        return {
+            budget: {
+                count: budgetState.count,
+                limit: budgetState.limit,
+                remaining: this.budget.remaining(),
+            },
+            pendingSlugs: this.debouncer.pending(),
+            recentDispatches: this.recentDispatches.slice(-10),
+            trackedSlugs: this.reverseMapping.size,
+        };
+    }
+    /**
+     * Handle an incoming Sanity webhook payload.
+     *
+     * This is the main entry point — call once per webhook request.
+     * Returns a result indicating what happened (dispatched, debounced,
+     * rate-limited, or ignored).
+     */
+    handle(payload) {
+        // Extract the document slug from the payload
+        const slug = extractSlug(payload);
+        if (!slug) {
+            return {
+                reason: "No document slug in payload",
+                status: "ignored",
+            };
+        }
+        // Look up affected areas
+        const impact = assessImpact([slug], this.reverseMapping);
+        if (impact.areas.length === 0) {
+            return {
+                reason: `Document "${slug}" is not tracked by any evaluation task`,
+                status: "ignored",
+            };
+        }
+        // Check daily budget
+        if (!this.budget.canDispatch()) {
+            const state = this.budget.currentState();
+            return {
+                dailyBudget: state.limit,
+                dailyCount: state.count,
+                status: "rate-limited",
+            };
+        }
+        // Push into debounce window
+        this.debouncer.push(slug);
+        const expiresAt = new Date(Date.now() + this.config.debounceMs).toISOString();
+        return {
+            pendingSlugs: [...this.debouncer.state().slugs],
+            status: "debounced",
+            windowExpiresAt: expiresAt,
+        };
+    }
+    /**
+     * Force-flush the debounce window (for graceful shutdown).
+     */
+    async shutdown() {
+        await this.debouncer.flush();
+    }
+    // -------------------------------------------------------------------------
+    // Private
+    // -------------------------------------------------------------------------
+    /**
+     * Called when the debounce window closes — dispatches a scoped evaluation.
+     */
+    async onDebounceFlush(slugs) {
+        // Re-check budget (may have been exhausted during debounce window)
+        if (!this.budget.canDispatch()) {
+            console.warn(`  ⚠️  Budget exhausted during debounce window. Skipping dispatch for: ${slugs.join(", ")}`);
+            return;
+        }
+        const impact = assessImpact(slugs, this.reverseMapping);
+        if (impact.areas.length === 0)
+            return;
+        console.log(`  📤 Dispatching evaluation for areas [${impact.areas.join(", ")}] ` +
+            `triggered by document changes: ${slugs.join(", ")}`);
+        const result = await dispatchEvaluation({
+            areas: impact.areas,
+            documentSlug: slugs.join(","),
+            taskIds: impact.taskIds,
+        }, {
+            githubToken: this.config.githubToken,
+            repo: this.config.githubRepo,
+        });
+        // Record dispatch
+        this.budget.record();
+        this.recentDispatches.push({
+            areas: impact.areas,
+            result,
+            timestamp: new Date().toISOString(),
+        });
+        // Keep only last 50 dispatches
+        if (this.recentDispatches.length > 50) {
+            this.recentDispatches.splice(0, this.recentDispatches.length - 50);
+        }
+        if (result.ok) {
+            console.log(`  ✅ Dispatch accepted (HTTP ${result.httpStatus})`);
+        }
+        else {
+            console.warn(`  ⚠️  Dispatch failed: ${result.error}`);
+        }
+    }
+}
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+/**
+ * Extract the document slug from a Sanity webhook payload.
+ *
+ * Sanity webhooks include the document in `result` — the slug is at
+ * `result.slug.current` for article documents.
+ */
+function extractSlug(payload) {
+    // Direct slug from projected document
+    if (payload.result?.slug?.current) {
+        return payload.result.slug.current;
+    }
+    // Fallback: extract from _id (e.g., "article-groq-introduction")
+    // This handles cases where the projection doesn't include slug
+    return undefined;
+}

package/dist/webhook/index.d.ts ADDED Viewed

@@ -0,0 +1,17 @@
+/**
+ * webhook/index.ts
+ *
+ * Barrel exports for the webhook event-driven trigger system.
+ *
+ * @see docs/design-docs/report-store/visibility-workflows.md
+ */
+export { handleEvalRequest } from "./eval-request-handler.js";
+export type { EvalRequestHandlerConfig, EvalRequestPayload, EvalRequestResult, } from "./eval-request-handler.js";
+export { createBudgetTracker } from "./budget.js";
+export type { BudgetState, BudgetTracker } from "./budget.js";
+export { createDebouncer } from "./debounce.js";
+export type { Debouncer, DebounceCallback, DebounceState } from "./debounce.js";
+export { dispatchEvaluation } from "./dispatch.js";
+export type { DispatchOptions, DispatchResult } from "./dispatch.js";
+export { WebhookHandler } from "./handler.js";
+export type { DispatchRequest, SanityWebhookPayload, WebhookHandlerConfig, WebhookResult, } from "./types.js";

package/dist/webhook/index.js ADDED Viewed

@@ -0,0 +1,12 @@
+/**
+ * webhook/index.ts
+ *
+ * Barrel exports for the webhook event-driven trigger system.
+ *
+ * @see docs/design-docs/report-store/visibility-workflows.md
+ */
+export { handleEvalRequest } from "./eval-request-handler.js";
+export { createBudgetTracker } from "./budget.js";
+export { createDebouncer } from "./debounce.js";
+export { dispatchEvaluation } from "./dispatch.js";
+export { WebhookHandler } from "./handler.js";

package/dist/webhook/types.d.ts ADDED Viewed

@@ -0,0 +1,109 @@
+/**
+ * webhook/types.ts
+ *
+ * Types for the Sanity Content Lake webhook payload and the
+ * event-driven evaluation trigger system.
+ *
+ * @see https://www.sanity.io/docs/webhooks
+ * @see docs/design-docs/report-store/visibility-workflows.md
+ */
+/** Evaluation dispatch request (sent to GitHub Actions) */
+export interface DispatchRequest {
+    /** Feature areas to evaluate */
+    areas: string[];
+    /** The document that triggered this evaluation */
+    documentId?: string;
+    /** Document slug */
+    documentSlug?: string;
+    /** Evaluation mode */
+    mode?: string;
+    /** Specific task IDs to evaluate */
+    taskIds: string[];
+}
+/**
+ * Sanity webhook payload shape.
+ *
+ * When a GROQ-powered webhook fires, Sanity POSTs a JSON body with
+ * the matching document and operation metadata.
+ */
+export interface SanityWebhookPayload {
+    /** The ID of the dataset */
+    dataset?: string;
+    /** The IDs of the documents that triggered the webhook */
+    ids: {
+        /** The created document ID (for create operations) */
+        created?: string;
+        /** The deleted document ID (for delete operations) */
+        deleted?: string;
+        /** The updated document ID (for update operations) */
+        updated?: string;
+    };
+    /** The operation that triggered the webhook */
+    operation: "create" | "delete" | "update";
+    /** The ID of the project */
+    projectId?: string;
+    /** The projected document data (shape depends on webhook GROQ projection) */
+    result?: {
+        _id: string;
+        _type: string;
+        slug?: {
+            current: string;
+        };
+        [key: string]: unknown;
+    };
+}
+/** Configuration for the webhook handler */
+export interface WebhookHandlerConfig {
+    /**
+     * Budget: maximum evaluations per day.
+     * When exceeded, webhooks are acknowledged but not dispatched.
+     * Default: 20
+     */
+    dailyBudget?: number;
+    /**
+     * Debounce window in milliseconds.
+     * Rapid edits within this window are coalesced into a single evaluation.
+     * Default: 300_000 (5 minutes)
+     */
+    debounceMs?: number;
+    /**
+     * The GitHub repository to dispatch to (owner/repo format).
+     * Default: "sanity-labs/ai-literacy-framework"
+     */
+    githubRepo?: string;
+    /**
+     * GitHub personal access token for dispatching repository_dispatch events.
+     * Required for triggering evaluations.
+     */
+    githubToken: string;
+    /**
+     * Path to the eval package root (for building reverse mappings).
+     * Default: process.cwd()
+     */
+    rootDir?: string;
+    /**
+     * Sanity webhook secret for verifying payload authenticity.
+     * If provided, the handler validates the X-Sanity-Webhook-Signature header.
+     */
+    webhookSecret?: string;
+}
+/** Result of handling a webhook */
+export type WebhookResult = {
+    status: "acknowledged";
+    reason: string;
+} | {
+    status: "debounced";
+    pendingSlugs: string[];
+    windowExpiresAt: string;
+} | {
+    status: "dispatched";
+    areas: string[];
+    taskIds: string[];
+} | {
+    status: "ignored";
+    reason: string;
+} | {
+    status: "rate-limited";
+    dailyCount: number;
+    dailyBudget: number;
+};

package/dist/webhook/types.js ADDED Viewed

@@ -0,0 +1,10 @@
+/**
+ * webhook/types.ts
+ *
+ * Types for the Sanity Content Lake webhook payload and the
+ * event-driven evaluation trigger system.
+ *
+ * @see https://www.sanity.io/docs/webhooks
+ * @see docs/design-docs/report-store/visibility-workflows.md
+ */
+export {};

package/package.json ADDED Viewed

@@ -0,0 +1,72 @@
+{
+  "name": "@sanity/ailf",
+  "version": "0.1.0",
+  "private": false,
+  "publishConfig": {
+    "access": "restricted"
+  },
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/sanity-labs/ai-literacy-framework.git",
+    "directory": "packages/eval"
+  },
+  "description": "AI Literacy Framework - Evaluation tool for Sanity documentation",
+  "type": "module",
+  "bin": {
+    "ailf": "./bin/ailf.js"
+  },
+  "files": [
+    "bin",
+    "dist",
+    "config",
+    "canonical",
+    "tasks"
+  ],
+  "scripts": {
+    "build": "tsc && tsx scripts/bundle-workspace-deps.ts",
+    "generate-configs": "tsx src/cli.ts generate-configs",
+    "fetch-docs": "tsx src/cli.ts fetch-docs",
+    "measure-retrieval": "tsx src/cli.ts measure-retrieval",
+    "eval": "tsx src/cli.ts eval",
+    "compare": "tsx src/cli.ts compare",
+    "grader-consistency": "tsx src/cli.ts grader consistency",
+    "grader-validate": "tsx src/cli.ts grader validate",
+    "grader-compare": "tsx src/cli.ts grader compare",
+    "grader-sensitivity": "tsx src/cli.ts grader sensitivity",
+    "calculate-scores": "tsx src/cli.ts calculate-scores",
+    "agent-report": "tsx src/cli.ts agent-report",
+    "share": "dotenv -e ../../.env -o -- promptfoo share",
+    "view": "dotenv -e ../../.env -o -- promptfoo view",
+    "cli": "tsx src/cli.ts",
+    "pipeline": "tsx src/cli.ts pipeline",
+    "validate": "tsx src/cli.ts validate",
+    "test": "tsx --test src/__tests__/*.test.ts",
+    "pr-comment": "tsx src/cli.ts pr-comment",
+    "coverage-audit": "tsx src/cli.ts coverage-audit",
+    "readiness-report": "tsx src/cli.ts readiness-report",
+    "discovery-report": "tsx src/cli.ts discovery-report",
+    "webhook-server": "tsx src/cli.ts webhook-server",
+    "weekly-digest": "tsx src/cli.ts weekly-digest"
+  },
+  "dependencies": {
+    "@google-cloud/bigquery": "^8.1.1",
+    "@inquirer/prompts": "^8.3.0",
+    "@portabletext/markdown": "^1.0.0",
+    "@sanity/client": "^7.3.0",
+    "commander": "^14.0.3",
+    "dotenv": "^16.4.7",
+    "dotenv-cli": "^11.0.0",
+    "js-yaml": "^4.1.0",
+    "promptfoo": "^0.120.24",
+    "zod": "^4.3.6"
+  },
+  "devDependencies": {
+    "@sanity/ailf-core": "workspace:*",
+    "@sanity/ailf-shared": "workspace:*",
+    "@types/js-yaml": "^4.0.9",
+    "@types/node": "^22.13.1",
+    "tsx": "^4.19.2",
+    "typescript": "^5.7.3"
+  }
+}

package/tasks/.expanded.agentic.yaml ADDED Viewed

@@ -0,0 +1,51 @@
+# .expanded.agentic.yaml
+#
+# AUTO-GENERATED — do not edit directly.
+# Gold entries only (no baseline) for agentic evaluation mode.
+# Source: tasks/*.yaml (single-definition format)
+# Run: pnpm generate-configs
+- assert:
+    - type: llm-rubric
+      value: |-
+        Score task completion from 0 to 100:
+        - 0: Couldn't attempt — missing critical information
+        - 20: Attempted but fundamentally wrong approach
+        - 50: Partial implementation — major functional gaps
+        - 80: Mostly complete — minor issues or missing edge cases
+        - 100: Fully functional code — works as expected
+        Must demonstrate:
+        - Configures a GROQ-powered webhook
+        - Webhook triggers on content changes
+        - Includes agent integration concepts
+        Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
+      metadata:
+        dimension: task-completion
+        maxScore: 100
+    - type: contains-any
+      value:
+        - webhook
+        - GROQ
+      weight: 1
+    - type: llm-rubric
+      value: |-
+        Score documentation coverage from 0 to 100:
+        - 0: Had to hallucinate/guess most implementation details
+        - 30: Significant gaps — filled with assumptions
+        - 50: Some gaps — inferred from partial information
+        - 80: Minor gaps — almost everything was documented
+        - 100: Complete coverage — all necessary info was in docs
+        Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
+      metadata:
+        dimension: doc-coverage
+        maxScore: 100
+  description: Test - Perspective ref expansion (gold)
+  vars:
+    docs: file://contexts/canonical/perspective-ref-test.md
+    task: |
+      Build a webhook handler that integrates with an AI agent pipeline.
+      Configure a GROQ-powered webhook that triggers when blog posts are
+      published and sends a payload to an agent endpoint.

package/tasks/.expanded.yaml ADDED Viewed

@@ -0,0 +1,66 @@
+# .expanded.yaml
+#
+# AUTO-GENERATED — do not edit directly.
+# Source: tasks/*.yaml (single-definition format)
+# Run: pnpm generate-configs
+- assert:
+    - type: llm-rubric
+      value: |-
+        Score task completion from 0 to 100:
+        - 0: Couldn't attempt — missing critical information
+        - 20: Attempted but fundamentally wrong approach
+        - 50: Partial implementation — major functional gaps
+        - 80: Mostly complete — minor issues or missing edge cases
+        - 100: Fully functional code — works as expected
+        Must demonstrate:
+        - Configures a GROQ-powered webhook
+        - Webhook triggers on content changes
+        - Includes agent integration concepts
+        Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
+      metadata:
+        dimension: task-completion
+        maxScore: 100
+    - type: contains-any
+      value:
+        - webhook
+        - GROQ
+      weight: 1
+    - type: llm-rubric
+      value: |-
+        Score documentation coverage from 0 to 100:
+        - 0: Had to hallucinate/guess most implementation details
+        - 30: Significant gaps — filled with assumptions
+        - 50: Some gaps — inferred from partial information
+        - 80: Minor gaps — almost everything was documented
+        - 100: Complete coverage — all necessary info was in docs
+        Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
+      metadata:
+        dimension: doc-coverage
+        maxScore: 100
+  description: Test - Perspective ref expansion (gold)
+  prompts:
+    - with-docs
+  vars:
+    docs: file://contexts/canonical/perspective-ref-test.md
+    task: |
+      Build a webhook handler that integrates with an AI agent pipeline.
+      Configure a GROQ-powered webhook that triggers when blog posts are
+      published and sends a payload to an agent endpoint.
+- description: Test - Perspective ref expansion (baseline)
+  prompts:
+    - without-docs
+  vars:
+    docs: ''
+    task: |
+      Build a webhook handler that integrates with an AI agent pipeline.
+      Configure a GROQ-powered webhook that triggers when blog posts are
+      published and sends a payload to an agent endpoint.
+  assert:
+    - type: llm-rubric
+      value: |-
+        Score task completion from 0 to 100 (same criteria as above).
+        Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}

package/tasks/frameworks.yaml ADDED Viewed

@@ -0,0 +1,98 @@
+# tasks/frameworks.yaml
+#
+# Other Framework Integrations — Remix, Nuxt, etc.
+#
+# Each task is defined once. The pipeline auto-generates gold (with docs)
+# and baseline (without docs) variants from each definition.
+# ============================================================
+# TASK: Remix Integration
+# ============================================================
+- id: remix-integration
+  description: "Frameworks - Remix integration with data fetching"
+  doc_coverage: true
+  canonical_docs:
+    - slug: displaying-content-in-a-react-router-front-end
+      reason: "React Router front-end content display guide"
+    - slug: visual-editing-with-react-router
+      reason: "React Router / Remix integration with visual editing"
+    - slug: functions-js-client
+      reason: "Configuring @sanity/client for data fetching"
+  reference_solution: reference-solutions/frameworks/remix.tsx
+  vars:
+    task: |
+      Integrate Sanity into a Remix application:
+      1. Set up the Sanity client
+      2. Create a loader that fetches blog posts using GROQ
+      3. Build a route component that renders the fetched posts
+      4. Handle loading and error states properly
+      Provide all necessary files for a working Remix + Sanity integration.
+    docs: file://contexts/canonical/remix-integration.md
+  assert:
+    - type: llm-rubric
+      template: task-completion
+      criteria:
+        - Sanity client configuration
+        - Remix loader function with GROQ query
+        - Route component using useLoaderData
+        - Proper typing
+    - type: llm-rubric
+      template: code-correctness
+      criteria:
+        - Modern Remix patterns (v2 conventions)
+        - Proper loader/component separation
+        - Valid GROQ queries
+        - No deprecated APIs
+    - type: contains-any
+      value:
+        - "useLoaderData"
+        - "loader"
+      weight: 1
+# ============================================================
+# TASK: Nuxt 3 Integration
+# ============================================================
+- id: nuxt-integration
+  description: "Frameworks - Nuxt 4 integration"
+  doc_coverage: true
+  canonical_docs:
+    - slug: displaying-content-in-nuxt-js
+      reason: "Nuxt.js front-end content display guide"
+    - slug: visual-editing-with-nuxt
+      reason: "Nuxt visual editing integration"
+  reference_solution: reference-solutions/frameworks/nuxt.ts
+  vars:
+    task: |
+      Integrate Sanity into a Nuxt 4 application:
+      1. Install and configure the @nuxtjs/sanity module
+      2. Create a page that fetches and displays blog posts
+      3. Use Nuxt composables for data fetching
+      Provide all necessary configuration and component code.
+    docs: file://contexts/canonical/nuxt-integration.md
+  assert:
+    - type: llm-rubric
+      template: task-completion
+      criteria:
+        - "@nuxtjs/sanity module setup in nuxt.config.ts"
+        - Page component using Nuxt data fetching composables
+        - Sanity GROQ query
+    - type: llm-rubric
+      template: code-correctness
+      criteria:
+        - Nuxt 3 module configuration syntax
+        - Uses useSanityQuery or equivalent composable
+        - Proper Nuxt 3 patterns (not Nuxt 2)
+    - type: contains-any
+      value:
+        - "@nuxtjs/sanity"
+        - "useSanityQuery"
+        - "sanity:"
+      weight: 1