@sanity/ailf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -0
- package/bin/ailf.js +64 -0
- package/canonical/grader-references/README.md +88 -0
- package/canonical/grader-references/groq.yaml +234 -0
- package/canonical/grader-references/studio-setup.yaml +275 -0
- package/canonical/reference-solutions/.gitkeep +1 -0
- package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
- package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
- package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
- package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
- package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
- package/canonical/reference-solutions/groq/joins-references.ts +300 -0
- package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
- package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
- package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
- package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
- package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
- package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
- package/config/bigquery/README.md +74 -0
- package/config/bigquery/views/area_scores.sql +87 -0
- package/config/bigquery/views/reports.sql +49 -0
- package/config/features.yaml +116 -0
- package/config/models.yaml +115 -0
- package/config/prompts.yaml +75 -0
- package/config/rubrics.yaml +62 -0
- package/config/schedules.yaml +43 -0
- package/config/sinks.yaml +54 -0
- package/config/sources.yaml +51 -0
- package/config/thresholds.yaml +49 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
- package/dist/_vendor/ailf-core/examples/index.js +285 -0
- package/dist/_vendor/ailf-core/index.d.ts +17 -0
- package/dist/_vendor/ailf-core/index.js +17 -0
- package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
- package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
- package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
- package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
- package/dist/_vendor/ailf-core/ports/context.js +14 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
- package/dist/_vendor/ailf-core/ports/index.js +7 -0
- package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
- package/dist/_vendor/ailf-core/ports/logger.js +11 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
- package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/schemas/index.js +16 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
- package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
- package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
- package/dist/_vendor/ailf-core/services/index.js +12 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
- package/dist/_vendor/ailf-core/services/scoring.js +222 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
- package/dist/_vendor/ailf-core/types/index.js +21 -0
- package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
- package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
- package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
- package/dist/_vendor/ailf-shared/document-ref.js +1 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
- package/dist/_vendor/ailf-shared/index.d.ts +16 -0
- package/dist/_vendor/ailf-shared/index.js +16 -0
- package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
- package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
- package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
- package/dist/_vendor/ailf-shared/score-grades.js +23 -0
- package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
- package/dist/adapters/cache/content-lake-cache.js +59 -0
- package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
- package/dist/adapters/cache/filesystem-cache.js +54 -0
- package/dist/adapters/cache/index.d.ts +2 -0
- package/dist/adapters/cache/index.js +2 -0
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
- package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
- package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
- package/dist/adapters/config-sources/file-config-adapter.js +96 -0
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +2 -0
- package/dist/adapters/doc-fetchers/index.d.ts +1 -0
- package/dist/adapters/doc-fetchers/index.js +1 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
- package/dist/adapters/eval-runners/index.d.ts +1 -0
- package/dist/adapters/eval-runners/index.js +1 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
- package/dist/adapters/index.d.ts +12 -0
- package/dist/adapters/index.js +12 -0
- package/dist/adapters/loggers/console-logger.d.ts +22 -0
- package/dist/adapters/loggers/console-logger.js +54 -0
- package/dist/adapters/loggers/index.d.ts +9 -0
- package/dist/adapters/loggers/index.js +9 -0
- package/dist/adapters/loggers/json-logger.d.ts +18 -0
- package/dist/adapters/loggers/json-logger.js +33 -0
- package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
- package/dist/adapters/loggers/quiet-logger.js +30 -0
- package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/composite-task-source.js +59 -0
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
- package/dist/adapters/task-sources/index.d.ts +7 -0
- package/dist/adapters/task-sources/index.js +7 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
- package/dist/adapters/task-sources/repo-schemas.js +234 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
- package/dist/adapters/task-sources/repo-task-source.js +104 -0
- package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
- package/dist/adapters/task-sources/repo-trigger.js +153 -0
- package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
- package/dist/adapters/task-sources/repo-validation.js +164 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
- package/dist/adapters/task-sources/yaml-task-source.js +136 -0
- package/dist/agent-observer/agentic-provider.d.ts +132 -0
- package/dist/agent-observer/agentic-provider.js +983 -0
- package/dist/agent-observer/classifier.d.ts +62 -0
- package/dist/agent-observer/classifier.js +269 -0
- package/dist/agent-observer/index.d.ts +7 -0
- package/dist/agent-observer/index.js +4 -0
- package/dist/agent-observer/pricing.d.ts +35 -0
- package/dist/agent-observer/pricing.js +82 -0
- package/dist/agent-observer/provider.d.ts +77 -0
- package/dist/agent-observer/provider.js +151 -0
- package/dist/agent-observer/proxy.d.ts +91 -0
- package/dist/agent-observer/proxy.js +321 -0
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/agent-observer/types.d.ts +137 -0
- package/dist/agent-observer/types.js +16 -0
- package/dist/assertions/source-isolation.d.ts +72 -0
- package/dist/assertions/source-isolation.js +117 -0
- package/dist/cli.d.ts +24 -0
- package/dist/cli.js +199 -0
- package/dist/commands/agent-report.d.ts +5 -0
- package/dist/commands/agent-report.js +69 -0
- package/dist/commands/baseline.d.ts +9 -0
- package/dist/commands/baseline.js +141 -0
- package/dist/commands/cache.d.ts +13 -0
- package/dist/commands/cache.js +135 -0
- package/dist/commands/calculate-scores.d.ts +8 -0
- package/dist/commands/calculate-scores.js +48 -0
- package/dist/commands/compare.d.ts +8 -0
- package/dist/commands/compare.js +120 -0
- package/dist/commands/completion.d.ts +18 -0
- package/dist/commands/completion.js +260 -0
- package/dist/commands/coverage-audit.d.ts +7 -0
- package/dist/commands/coverage-audit.js +40 -0
- package/dist/commands/discovery-report.d.ts +10 -0
- package/dist/commands/discovery-report.js +44 -0
- package/dist/commands/eval.d.ts +9 -0
- package/dist/commands/eval.js +35 -0
- package/dist/commands/explain-handler.d.ts +34 -0
- package/dist/commands/explain-handler.js +719 -0
- package/dist/commands/fetch-docs.d.ts +8 -0
- package/dist/commands/fetch-docs.js +128 -0
- package/dist/commands/generate-configs.d.ts +8 -0
- package/dist/commands/generate-configs.js +46 -0
- package/dist/commands/grader/index.d.ts +11 -0
- package/dist/commands/grader/index.js +118 -0
- package/dist/commands/init.d.ts +19 -0
- package/dist/commands/init.js +150 -0
- package/dist/commands/interactive.d.ts +12 -0
- package/dist/commands/interactive.js +238 -0
- package/dist/commands/lookup-doc.d.ts +15 -0
- package/dist/commands/lookup-doc.js +84 -0
- package/dist/commands/measure-retrieval.d.ts +5 -0
- package/dist/commands/measure-retrieval.js +65 -0
- package/dist/commands/pipeline-action.d.ts +71 -0
- package/dist/commands/pipeline-action.js +305 -0
- package/dist/commands/pipeline.d.ts +62 -0
- package/dist/commands/pipeline.js +53 -0
- package/dist/commands/pr-comment.d.ts +8 -0
- package/dist/commands/pr-comment.js +47 -0
- package/dist/commands/publish.d.ts +26 -0
- package/dist/commands/publish.js +253 -0
- package/dist/commands/readiness-report.d.ts +10 -0
- package/dist/commands/readiness-report.js +104 -0
- package/dist/commands/shared/options.d.ts +29 -0
- package/dist/commands/shared/options.js +57 -0
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +16 -0
- package/dist/commands/validate-tasks.js +93 -0
- package/dist/commands/validate.d.ts +9 -0
- package/dist/commands/validate.js +73 -0
- package/dist/commands/webhook-server.d.ts +5 -0
- package/dist/commands/webhook-server.js +30 -0
- package/dist/commands/weekly-digest.d.ts +10 -0
- package/dist/commands/weekly-digest.js +104 -0
- package/dist/composition-root.d.ts +26 -0
- package/dist/composition-root.js +107 -0
- package/dist/interpolate.d.ts +26 -0
- package/dist/interpolate.js +70 -0
- package/dist/job-store.d.ts +104 -0
- package/dist/job-store.js +188 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.d.ts +27 -0
- package/dist/orchestration/build-app-context.js +81 -0
- package/dist/orchestration/build-step-sequence.d.ts +15 -0
- package/dist/orchestration/build-step-sequence.js +84 -0
- package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
- package/dist/orchestration/config-to-source-overrides.js +28 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/index.d.ts +11 -0
- package/dist/orchestration/index.js +11 -0
- package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
- package/dist/orchestration/pipeline-orchestrator.js +153 -0
- package/dist/orchestration/step-runner.d.ts +20 -0
- package/dist/orchestration/step-runner.js +88 -0
- package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
- package/dist/orchestration/steps/calculate-scores-step.js +95 -0
- package/dist/orchestration/steps/callback-step.d.ts +24 -0
- package/dist/orchestration/steps/callback-step.js +76 -0
- package/dist/orchestration/steps/compare-step.d.ts +14 -0
- package/dist/orchestration/steps/compare-step.js +92 -0
- package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
- package/dist/orchestration/steps/discovery-report-step.js +55 -0
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
- package/dist/orchestration/steps/fetch-docs-step.js +135 -0
- package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
- package/dist/orchestration/steps/gap-analysis-step.js +136 -0
- package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
- package/dist/orchestration/steps/generate-configs-step.js +85 -0
- package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
- package/dist/orchestration/steps/grader-consistency-step.js +64 -0
- package/dist/orchestration/steps/index.d.ts +19 -0
- package/dist/orchestration/steps/index.js +19 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
- package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
- package/dist/orchestration/steps/publish-report-step.js +216 -0
- package/dist/orchestration/steps/readiness-step.d.ts +13 -0
- package/dist/orchestration/steps/readiness-step.js +91 -0
- package/dist/orchestration/steps/report-step.d.ts +12 -0
- package/dist/orchestration/steps/report-step.js +49 -0
- package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
- package/dist/orchestration/steps/run-eval-step.js +195 -0
- package/dist/orchestration/steps/validate-step.d.ts +12 -0
- package/dist/orchestration/steps/validate-step.js +41 -0
- package/dist/pipeline/agent-behavior-report.d.ts +53 -0
- package/dist/pipeline/agent-behavior-report.js +132 -0
- package/dist/pipeline/attribution.d.ts +47 -0
- package/dist/pipeline/attribution.js +226 -0
- package/dist/pipeline/baseline.d.ts +37 -0
- package/dist/pipeline/baseline.js +141 -0
- package/dist/pipeline/cache.d.ts +101 -0
- package/dist/pipeline/cache.js +283 -0
- package/dist/pipeline/calculate-scores.d.ts +102 -0
- package/dist/pipeline/calculate-scores.js +1128 -0
- package/dist/pipeline/callback-delivery.d.ts +50 -0
- package/dist/pipeline/callback-delivery.js +89 -0
- package/dist/pipeline/checks.d.ts +39 -0
- package/dist/pipeline/checks.js +280 -0
- package/dist/pipeline/classify-url.d.ts +61 -0
- package/dist/pipeline/classify-url.js +93 -0
- package/dist/pipeline/compare.d.ts +31 -0
- package/dist/pipeline/compare.js +208 -0
- package/dist/pipeline/coverage-audit.d.ts +39 -0
- package/dist/pipeline/coverage-audit.js +165 -0
- package/dist/pipeline/degradations.d.ts +85 -0
- package/dist/pipeline/degradations.js +242 -0
- package/dist/pipeline/discovery-report.d.ts +55 -0
- package/dist/pipeline/discovery-report.js +178 -0
- package/dist/pipeline/eval-constants.d.ts +68 -0
- package/dist/pipeline/eval-constants.js +111 -0
- package/dist/pipeline/eval-fingerprint.d.ts +66 -0
- package/dist/pipeline/eval-fingerprint.js +175 -0
- package/dist/pipeline/expand-tasks.d.ts +220 -0
- package/dist/pipeline/expand-tasks.js +421 -0
- package/dist/pipeline/failure-modes.d.ts +46 -0
- package/dist/pipeline/failure-modes.js +348 -0
- package/dist/pipeline/fetch-url-content.d.ts +44 -0
- package/dist/pipeline/fetch-url-content.js +93 -0
- package/dist/pipeline/gap-analysis.d.ts +48 -0
- package/dist/pipeline/gap-analysis.js +231 -0
- package/dist/pipeline/generate-configs.d.ts +72 -0
- package/dist/pipeline/generate-configs.js +395 -0
- package/dist/pipeline/grader-api.d.ts +49 -0
- package/dist/pipeline/grader-api.js +200 -0
- package/dist/pipeline/grader-compare-runner.d.ts +44 -0
- package/dist/pipeline/grader-compare-runner.js +301 -0
- package/dist/pipeline/grader-comparison.d.ts +111 -0
- package/dist/pipeline/grader-comparison.js +161 -0
- package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
- package/dist/pipeline/grader-consistency-runner.js +270 -0
- package/dist/pipeline/grader-consistency.d.ts +103 -0
- package/dist/pipeline/grader-consistency.js +146 -0
- package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
- package/dist/pipeline/grader-sensitivity-runner.js +282 -0
- package/dist/pipeline/grader-sensitivity.d.ts +94 -0
- package/dist/pipeline/grader-sensitivity.js +144 -0
- package/dist/pipeline/grader-validate-runner.d.ts +38 -0
- package/dist/pipeline/grader-validate-runner.js +229 -0
- package/dist/pipeline/grader-validation.d.ts +107 -0
- package/dist/pipeline/grader-validation.js +169 -0
- package/dist/pipeline/map-request-to-config.d.ts +19 -0
- package/dist/pipeline/map-request-to-config.js +80 -0
- package/dist/pipeline/measure-retrieval.d.ts +59 -0
- package/dist/pipeline/measure-retrieval.js +111 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
- package/dist/pipeline/mirror-repo-tasks.js +350 -0
- package/dist/pipeline/plan-format.d.ts +33 -0
- package/dist/pipeline/plan-format.js +202 -0
- package/dist/pipeline/plan.d.ts +169 -0
- package/dist/pipeline/plan.js +708 -0
- package/dist/pipeline/pr-comment.d.ts +19 -0
- package/dist/pipeline/pr-comment.js +502 -0
- package/dist/pipeline/probe.d.ts +52 -0
- package/dist/pipeline/probe.js +390 -0
- package/dist/pipeline/provenance.d.ts +47 -0
- package/dist/pipeline/provenance.js +146 -0
- package/dist/pipeline/readiness-report.d.ts +87 -0
- package/dist/pipeline/readiness-report.js +205 -0
- package/dist/pipeline/release-classification.d.ts +54 -0
- package/dist/pipeline/release-classification.js +238 -0
- package/dist/pipeline/release-report.d.ts +37 -0
- package/dist/pipeline/release-report.js +222 -0
- package/dist/pipeline/repo-eval-comment.d.ts +37 -0
- package/dist/pipeline/repo-eval-comment.js +165 -0
- package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
- package/dist/pipeline/repo-threshold-evaluator.js +162 -0
- package/dist/pipeline/resolve-mappings.d.ts +35 -0
- package/dist/pipeline/resolve-mappings.js +72 -0
- package/dist/pipeline/retrieval-metrics.d.ts +39 -0
- package/dist/pipeline/retrieval-metrics.js +136 -0
- package/dist/pipeline/reverse-mapping.d.ts +67 -0
- package/dist/pipeline/reverse-mapping.js +88 -0
- package/dist/pipeline/schemas.d.ts +9 -0
- package/dist/pipeline/schemas.js +9 -0
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +88 -0
- package/dist/pipeline/targeted-loo.js +203 -0
- package/dist/pipeline/thresholds.d.ts +27 -0
- package/dist/pipeline/thresholds.js +245 -0
- package/dist/pipeline/types.d.ts +10 -0
- package/dist/pipeline/types.js +10 -0
- package/dist/pipeline/validate.d.ts +67 -0
- package/dist/pipeline/validate.js +406 -0
- package/dist/pipeline/webhook-server.d.ts +37 -0
- package/dist/pipeline/webhook-server.js +133 -0
- package/dist/report-store.d.ts +84 -0
- package/dist/report-store.js +208 -0
- package/dist/sanity/client.d.ts +38 -0
- package/dist/sanity/client.js +86 -0
- package/dist/sanity/portable-text.d.ts +11 -0
- package/dist/sanity/portable-text.js +211 -0
- package/dist/sanity/queries.d.ts +133 -0
- package/dist/sanity/queries.js +300 -0
- package/dist/schedules/digest.d.ts +116 -0
- package/dist/schedules/digest.js +156 -0
- package/dist/schedules/index.d.ts +12 -0
- package/dist/schedules/index.js +10 -0
- package/dist/schedules/loader.d.ts +31 -0
- package/dist/schedules/loader.js +73 -0
- package/dist/schedules/schema.d.ts +9 -0
- package/dist/schedules/schema.js +9 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +21 -0
- package/dist/scripts/validate-task-sources.js +210 -0
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/bigquery/index.d.ts +131 -0
- package/dist/sinks/bigquery/index.js +222 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/index.d.ts +23 -0
- package/dist/sinks/index.js +18 -0
- package/dist/sinks/loader.d.ts +18 -0
- package/dist/sinks/loader.js +82 -0
- package/dist/sinks/retry.d.ts +24 -0
- package/dist/sinks/retry.js +52 -0
- package/dist/sinks/schema.d.ts +9 -0
- package/dist/sinks/schema.js +9 -0
- package/dist/sinks/slack/format.d.ts +65 -0
- package/dist/sinks/slack/format.js +327 -0
- package/dist/sinks/slack/index.d.ts +27 -0
- package/dist/sinks/slack/index.js +78 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +59 -0
- package/dist/sinks/types.js +44 -0
- package/dist/sinks/webhook/index.d.ts +19 -0
- package/dist/sinks/webhook/index.js +50 -0
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/sources.d.ts +104 -0
- package/dist/sources.js +292 -0
- package/dist/webhook/budget.d.ts +42 -0
- package/dist/webhook/budget.js +60 -0
- package/dist/webhook/debounce.d.ts +67 -0
- package/dist/webhook/debounce.js +76 -0
- package/dist/webhook/dispatch.d.ts +45 -0
- package/dist/webhook/dispatch.js +84 -0
- package/dist/webhook/eval-request-handler.d.ts +87 -0
- package/dist/webhook/eval-request-handler.js +181 -0
- package/dist/webhook/handler.d.ts +88 -0
- package/dist/webhook/handler.js +203 -0
- package/dist/webhook/index.d.ts +17 -0
- package/dist/webhook/index.js +12 -0
- package/dist/webhook/types.d.ts +109 -0
- package/dist/webhook/types.js +10 -0
- package/package.json +72 -0
- package/tasks/.expanded.agentic.yaml +51 -0
- package/tasks/.expanded.yaml +66 -0
- package/tasks/frameworks.yaml +98 -0
- package/tasks/functions.yaml +51 -0
- package/tasks/groq.yaml +216 -0
- package/tasks/nextjs-live.yaml +62 -0
- package/tasks/studio-setup.yaml +111 -0
- package/tasks/visual-editing.yaml +120 -0
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline/validate.ts
|
|
3
|
+
*
|
|
4
|
+
* Configuration validation for the evaluation pipeline. Checks that all YAML
|
|
5
|
+
* config files are consistent: every task has a canonical mapping, every
|
|
6
|
+
* mapping has a reference solution file, required files exist, etc.
|
|
7
|
+
*
|
|
8
|
+
* All individual validators are exported so they can be tested independently.
|
|
9
|
+
*/
|
|
10
|
+
import fs from "fs";
|
|
11
|
+
import path from "path";
|
|
12
|
+
import { load } from "js-yaml";
|
|
13
|
+
import { resolveMappings } from "./resolve-mappings.js";
|
|
14
|
+
import { FeatureRegistrySchema, formatZodErrors, RubricConfigSchema, TaskFileSchema, ThresholdConfigSchema, } from "./schemas.js";
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
// Helpers
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
/**
|
|
19
|
+
* Run all validation checks and return a combined result.
|
|
20
|
+
* `valid` is true only if there are zero error-severity issues.
|
|
21
|
+
*/
|
|
22
|
+
export function validateConfiguration(rootDir) {
|
|
23
|
+
const resolvedDir = path.resolve(rootDir);
|
|
24
|
+
const issues = [
|
|
25
|
+
...validateModelsYaml(resolvedDir),
|
|
26
|
+
...validateRubricsYaml(resolvedDir),
|
|
27
|
+
...validateFeaturesYaml(resolvedDir),
|
|
28
|
+
...validateThresholdsYaml(resolvedDir),
|
|
29
|
+
...validateTaskFiles(resolvedDir),
|
|
30
|
+
...validateReferenceSolutions(resolvedDir),
|
|
31
|
+
...validateContexts(resolvedDir),
|
|
32
|
+
];
|
|
33
|
+
const valid = issues.every((issue) => issue.severity !== "error");
|
|
34
|
+
return { issues, valid };
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Check that canonical context files exist. These are the per-task
|
|
38
|
+
* gold-retrieval contexts actually referenced by task definitions.
|
|
39
|
+
*
|
|
40
|
+
* Contexts are generated by fetch-docs and may not exist yet —
|
|
41
|
+
* returns warnings, not errors.
|
|
42
|
+
*/
|
|
43
|
+
export function validateContexts(rootDir) {
|
|
44
|
+
const source = "validateContexts";
|
|
45
|
+
const issues = [];
|
|
46
|
+
const canonicalDir = path.join(rootDir, "contexts", "canonical");
|
|
47
|
+
if (!fs.existsSync(canonicalDir)) {
|
|
48
|
+
issues.push(warning(source, "contexts/canonical/ directory not found — run 'pnpm fetch-docs' to generate", canonicalDir));
|
|
49
|
+
return issues;
|
|
50
|
+
}
|
|
51
|
+
const mappings = resolveMappings(rootDir);
|
|
52
|
+
for (const [, areaConfig] of Object.entries(mappings.feature_areas)) {
|
|
53
|
+
if (!areaConfig?.tasks)
|
|
54
|
+
continue;
|
|
55
|
+
for (const task of areaConfig.tasks) {
|
|
56
|
+
if (!task.id)
|
|
57
|
+
continue;
|
|
58
|
+
const contextFile = path.join(canonicalDir, `${task.id}.md`);
|
|
59
|
+
if (!fs.existsSync(contextFile)) {
|
|
60
|
+
issues.push(warning(source, `Missing canonical context for task '${task.id}' — run 'pnpm fetch-docs' to generate`, contextFile));
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return issues;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Check that config/features.yaml exists, parses, and conforms to the Zod schema.
|
|
68
|
+
* Also cross-references covered features against actual task files for consistency.
|
|
69
|
+
*
|
|
70
|
+
* Returns warnings (not errors) if the file is missing — the feature registry
|
|
71
|
+
* is optional and doesn't block evaluation.
|
|
72
|
+
*/
|
|
73
|
+
export function validateFeaturesYaml(rootDir) {
|
|
74
|
+
const source = "validateFeaturesYaml";
|
|
75
|
+
const issues = [];
|
|
76
|
+
const filePath = path.join(rootDir, "config", "features.yaml");
|
|
77
|
+
if (!fs.existsSync(filePath)) {
|
|
78
|
+
// Feature registry is optional — warn, don't error
|
|
79
|
+
issues.push(warning(source, "config/features.yaml not found — coverage audit unavailable", filePath));
|
|
80
|
+
return issues;
|
|
81
|
+
}
|
|
82
|
+
const result = parseYamlFile(filePath, source);
|
|
83
|
+
if (!result.ok)
|
|
84
|
+
return [result.issue];
|
|
85
|
+
const zodResult = FeatureRegistrySchema.safeParse(result.data);
|
|
86
|
+
if (!zodResult.success) {
|
|
87
|
+
const lines = formatZodErrors(zodResult.error);
|
|
88
|
+
for (const line of lines) {
|
|
89
|
+
issues.push(error(source, `config/features.yaml: ${line.trim()}`, filePath));
|
|
90
|
+
}
|
|
91
|
+
return issues;
|
|
92
|
+
}
|
|
93
|
+
// Cross-reference: warn if a "covered" feature has no matching task file
|
|
94
|
+
const tasksDir = path.join(rootDir, "tasks");
|
|
95
|
+
if (fs.existsSync(tasksDir)) {
|
|
96
|
+
const taskFiles = new Set(fs
|
|
97
|
+
.readdirSync(tasksDir)
|
|
98
|
+
.filter((f) => f.endsWith(".yaml") || f.endsWith(".yml"))
|
|
99
|
+
.map((f) => f.replace(/\.(yaml|yml)$/, "")));
|
|
100
|
+
for (const feature of zodResult.data.features) {
|
|
101
|
+
if (feature.status === "covered" && feature.area) {
|
|
102
|
+
if (!taskFiles.has(feature.area)) {
|
|
103
|
+
issues.push(warning(source, `Feature '${feature.id}' is status: covered with area: '${feature.area}', but tasks/${feature.area}.yaml does not exist`, filePath));
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
// Warn if an "uncovered" feature has a matching task file
|
|
107
|
+
if (feature.status === "uncovered" && feature.area) {
|
|
108
|
+
if (taskFiles.has(feature.area)) {
|
|
109
|
+
issues.push(warning(source, `Feature '${feature.id}' is status: uncovered but tasks/${feature.area}.yaml exists — consider updating status to 'covered'`, filePath));
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
// Check for duplicate feature IDs
|
|
114
|
+
const ids = new Set();
|
|
115
|
+
for (const feature of zodResult.data.features) {
|
|
116
|
+
if (ids.has(feature.id)) {
|
|
117
|
+
issues.push(error(source, `Duplicate feature id '${feature.id}' in config/features.yaml`, filePath));
|
|
118
|
+
}
|
|
119
|
+
ids.add(feature.id);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
return issues;
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Check that config/models.yaml exists, parses, has at least one model with an id
|
|
126
|
+
* and label, and has a grader defined.
|
|
127
|
+
*/
|
|
128
|
+
export function validateModelsYaml(rootDir) {
|
|
129
|
+
const source = "validateModelsYaml";
|
|
130
|
+
const issues = [];
|
|
131
|
+
const filePath = path.join(rootDir, "config", "models.yaml");
|
|
132
|
+
const result = parseYamlFile(filePath, source);
|
|
133
|
+
if (!result.ok)
|
|
134
|
+
return [result.issue];
|
|
135
|
+
const { data } = result;
|
|
136
|
+
if (!data || typeof data !== "object") {
|
|
137
|
+
issues.push(error(source, "config/models.yaml did not parse to an object", filePath));
|
|
138
|
+
return issues;
|
|
139
|
+
}
|
|
140
|
+
// Check models array
|
|
141
|
+
if (!Array.isArray(data.models)) {
|
|
142
|
+
issues.push(error(source, "config/models.yaml is missing a 'models' array", filePath));
|
|
143
|
+
}
|
|
144
|
+
else if (data.models.length === 0) {
|
|
145
|
+
issues.push(error(source, "config/models.yaml has an empty 'models' array", filePath));
|
|
146
|
+
}
|
|
147
|
+
else {
|
|
148
|
+
for (const [i, model] of data.models.entries()) {
|
|
149
|
+
if (!model.id) {
|
|
150
|
+
issues.push(error(source, `models[${i}] is missing 'id'`, filePath));
|
|
151
|
+
}
|
|
152
|
+
if (!model.label) {
|
|
153
|
+
issues.push(error(source, `models[${i}] is missing 'label'`, filePath));
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
// Check grader
|
|
158
|
+
if (!data.grader) {
|
|
159
|
+
issues.push(error(source, "config/models.yaml is missing a 'grader' section", filePath));
|
|
160
|
+
}
|
|
161
|
+
else if (!data.grader.id) {
|
|
162
|
+
issues.push(error(source, "config/models.yaml grader is missing 'id'", filePath));
|
|
163
|
+
}
|
|
164
|
+
return issues;
|
|
165
|
+
}
|
|
166
|
+
// ---------------------------------------------------------------------------
|
|
167
|
+
// Validators
|
|
168
|
+
// ---------------------------------------------------------------------------
|
|
169
|
+
/**
|
|
170
|
+
* Check that reference solution files exist on disk for every task
|
|
171
|
+
* that declares a reference_solution path. Reads from inline task
|
|
172
|
+
* definitions via resolveMappings.
|
|
173
|
+
*
|
|
174
|
+
* NOTE: When tasks come from the Content Lake, reference solutions also
|
|
175
|
+
* live there (as ailf.referenceSolution documents). This validator only
|
|
176
|
+
* applies to YAML-based tasks with local file paths.
|
|
177
|
+
*/
|
|
178
|
+
export function validateReferenceSolutions(rootDir) {
|
|
179
|
+
const source = "validateReferenceSolutions";
|
|
180
|
+
const issues = [];
|
|
181
|
+
const canonicalDir = path.join(rootDir, "canonical");
|
|
182
|
+
const mappings = resolveMappings(rootDir);
|
|
183
|
+
for (const [area, areaConfig] of Object.entries(mappings.feature_areas)) {
|
|
184
|
+
if (!areaConfig?.tasks)
|
|
185
|
+
continue;
|
|
186
|
+
for (const task of areaConfig.tasks) {
|
|
187
|
+
const taskLabel = `${area}/${task.id}`;
|
|
188
|
+
if (!task.reference_solution) {
|
|
189
|
+
issues.push(error(source, `${taskLabel} is missing 'reference_solution'`));
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
const solutionPath = path.join(canonicalDir, task.reference_solution);
|
|
193
|
+
if (!fs.existsSync(solutionPath)) {
|
|
194
|
+
issues.push(error(source, `${taskLabel} reference_solution not found: ${task.reference_solution}`, solutionPath));
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
return issues;
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Check that config/rubrics.yaml exists, parses, and conforms to the Zod schema.
|
|
202
|
+
* Returns the set of valid template keys for cross-referencing by task
|
|
203
|
+
* validation.
|
|
204
|
+
*/
|
|
205
|
+
export function validateRubricsYaml(rootDir) {
|
|
206
|
+
const source = "validateRubricsYaml";
|
|
207
|
+
const issues = [];
|
|
208
|
+
const filePath = path.join(rootDir, "config", "rubrics.yaml");
|
|
209
|
+
const result = parseYamlFile(filePath, source);
|
|
210
|
+
if (!result.ok)
|
|
211
|
+
return [result.issue];
|
|
212
|
+
const zodResult = RubricConfigSchema.safeParse(result.data);
|
|
213
|
+
if (!zodResult.success) {
|
|
214
|
+
const lines = formatZodErrors(zodResult.error);
|
|
215
|
+
for (const line of lines) {
|
|
216
|
+
issues.push(error(source, `config/rubrics.yaml: ${line.trim()}`, filePath));
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
return issues;
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Check that tasks/*.yaml files exist, parse, and conform to the Zod schema.
|
|
223
|
+
* Validates both the new single-definition format (with `id`) and the legacy
|
|
224
|
+
* paired format. Uses `TaskFileSchema` from schemas.ts for structural
|
|
225
|
+
* validation, plus cross-entry checks (duplicate IDs, docs path consistency).
|
|
226
|
+
*/
|
|
227
|
+
export function validateTaskFiles(rootDir) {
|
|
228
|
+
const source = "validateTaskFiles";
|
|
229
|
+
const issues = [];
|
|
230
|
+
const tasksDir = path.join(rootDir, "tasks");
|
|
231
|
+
if (!fs.existsSync(tasksDir)) {
|
|
232
|
+
issues.push(warning(source, "tasks/ directory not found (using Content Lake tasks?)", tasksDir));
|
|
233
|
+
return issues;
|
|
234
|
+
}
|
|
235
|
+
const yamlFiles = fs
|
|
236
|
+
.readdirSync(tasksDir)
|
|
237
|
+
.filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."));
|
|
238
|
+
if (yamlFiles.length === 0) {
|
|
239
|
+
issues.push(warning(source, "No task YAML files found in tasks/ (using Content Lake tasks?)", tasksDir));
|
|
240
|
+
return issues;
|
|
241
|
+
}
|
|
242
|
+
const allIds = new Map(); // id → source file
|
|
243
|
+
const templateKeys = loadTemplateKeys(rootDir);
|
|
244
|
+
for (const file of yamlFiles) {
|
|
245
|
+
const filePath = path.join(tasksDir, file);
|
|
246
|
+
// Step 1: Parse YAML
|
|
247
|
+
const result = parseYamlFile(filePath, source);
|
|
248
|
+
if (!result.ok) {
|
|
249
|
+
issues.push(result.issue);
|
|
250
|
+
continue;
|
|
251
|
+
}
|
|
252
|
+
const { data } = result;
|
|
253
|
+
if (!Array.isArray(data)) {
|
|
254
|
+
issues.push(error(source, `${file} did not parse to an array of tasks`, filePath));
|
|
255
|
+
continue;
|
|
256
|
+
}
|
|
257
|
+
// Step 2: Validate each entry with Zod schema
|
|
258
|
+
const zodResult = TaskFileSchema.safeParse(data);
|
|
259
|
+
if (!zodResult.success) {
|
|
260
|
+
const lines = formatZodErrors(zodResult.error);
|
|
261
|
+
for (const line of lines) {
|
|
262
|
+
issues.push(error(source, `${file}: ${line.trim()}`, filePath));
|
|
263
|
+
}
|
|
264
|
+
continue;
|
|
265
|
+
}
|
|
266
|
+
// Step 3: Cross-entry validation (duplicate IDs, docs path consistency)
|
|
267
|
+
for (const entry of zodResult.data) {
|
|
268
|
+
if ("id" in entry && typeof entry.id === "string") {
|
|
269
|
+
// Check for duplicate IDs across all files
|
|
270
|
+
if (allIds.has(entry.id)) {
|
|
271
|
+
issues.push(error(source, `${file}: duplicate id '${entry.id}' (also in ${allIds.get(entry.id)})`, filePath));
|
|
272
|
+
}
|
|
273
|
+
else {
|
|
274
|
+
allIds.set(entry.id, file);
|
|
275
|
+
}
|
|
276
|
+
// Check docs path matches task id
|
|
277
|
+
const vars = entry.vars;
|
|
278
|
+
if (vars.docs && typeof vars.docs === "string") {
|
|
279
|
+
const expectedPath = `file://contexts/canonical/${entry.id}.md`;
|
|
280
|
+
if (vars.docs !== expectedPath) {
|
|
281
|
+
issues.push(warning(source, `${file}: id is '${entry.id}' but docs path is '${vars.docs}' (expected '${expectedPath}')`, filePath));
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
// Check that llm-rubric template references exist in config/rubrics.yaml
|
|
285
|
+
const asserts = entry.assert;
|
|
286
|
+
if (Array.isArray(asserts) && templateKeys.size > 0) {
|
|
287
|
+
for (const a of asserts) {
|
|
288
|
+
const assertion = a;
|
|
289
|
+
if (assertion.type === "llm-rubric" &&
|
|
290
|
+
typeof assertion.template === "string") {
|
|
291
|
+
if (!templateKeys.has(assertion.template)) {
|
|
292
|
+
issues.push(error(source, `${file}: task '${entry.id}' references unknown rubric template '${assertion.template}' (available: ${[...templateKeys].join(", ")})`, filePath));
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
return issues;
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* Check that config/thresholds.yaml exists, parses, and conforms to the Zod schema.
|
|
304
|
+
*
|
|
305
|
+
* Returns warnings (not errors) if the file is missing — thresholds are
|
|
306
|
+
* optional and don't block evaluation. They only activate when
|
|
307
|
+
* `--readiness` or severity-aware sink routing is used.
|
|
308
|
+
*/
|
|
309
|
+
export function validateThresholdsYaml(rootDir) {
|
|
310
|
+
const source = "validateThresholdsYaml";
|
|
311
|
+
const issues = [];
|
|
312
|
+
const filePath = path.join(rootDir, "config", "thresholds.yaml");
|
|
313
|
+
if (!fs.existsSync(filePath)) {
|
|
314
|
+
// Thresholds are optional — warn, don't error
|
|
315
|
+
issues.push(warning(source, "config/thresholds.yaml not found — readiness gates and threshold alerts unavailable", filePath));
|
|
316
|
+
return issues;
|
|
317
|
+
}
|
|
318
|
+
const result = parseYamlFile(filePath, source);
|
|
319
|
+
if (!result.ok)
|
|
320
|
+
return [result.issue];
|
|
321
|
+
const zodResult = ThresholdConfigSchema.safeParse(result.data);
|
|
322
|
+
if (!zodResult.success) {
|
|
323
|
+
const lines = formatZodErrors(zodResult.error);
|
|
324
|
+
for (const line of lines) {
|
|
325
|
+
issues.push(error(source, `config/thresholds.yaml: ${line.trim()}`, filePath));
|
|
326
|
+
}
|
|
327
|
+
return issues;
|
|
328
|
+
}
|
|
329
|
+
// Cross-reference: warn if an area override references an area with no task file
|
|
330
|
+
if (zodResult.data.areas) {
|
|
331
|
+
const tasksDir = path.join(rootDir, "tasks");
|
|
332
|
+
if (fs.existsSync(tasksDir)) {
|
|
333
|
+
const taskFiles = new Set(fs
|
|
334
|
+
.readdirSync(tasksDir)
|
|
335
|
+
.filter((f) => f.endsWith(".yaml") || f.endsWith(".yml"))
|
|
336
|
+
.map((f) => f.replace(/\.(yaml|yml)$/, "")));
|
|
337
|
+
for (const areaName of Object.keys(zodResult.data.areas)) {
|
|
338
|
+
if (!taskFiles.has(areaName)) {
|
|
339
|
+
issues.push(warning(source, `config/thresholds.yaml: area override '${areaName}' has no matching tasks/${areaName}.yaml`, filePath));
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
return issues;
|
|
345
|
+
}
|
|
346
|
+
function error(source, message, filePath) {
|
|
347
|
+
return {
|
|
348
|
+
message,
|
|
349
|
+
severity: "error",
|
|
350
|
+
source,
|
|
351
|
+
...(filePath && { path: filePath }),
|
|
352
|
+
};
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Load the set of valid rubric template keys from config/rubrics.yaml.
|
|
356
|
+
* Returns an empty set if the file is missing or invalid.
|
|
357
|
+
*/
|
|
358
|
+
function loadTemplateKeys(rootDir) {
|
|
359
|
+
const filePath = path.join(rootDir, "config", "rubrics.yaml");
|
|
360
|
+
if (!fs.existsSync(filePath))
|
|
361
|
+
return new Set();
|
|
362
|
+
try {
|
|
363
|
+
const raw = fs.readFileSync(filePath, "utf-8");
|
|
364
|
+
const parsed = load(raw);
|
|
365
|
+
const templates = parsed?.templates;
|
|
366
|
+
if (templates && typeof templates === "object") {
|
|
367
|
+
return new Set(Object.keys(templates));
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
catch {
|
|
371
|
+
// Ignore — structural errors are caught by validateRubricsYaml
|
|
372
|
+
}
|
|
373
|
+
return new Set();
|
|
374
|
+
}
|
|
375
|
+
/** Safely parse a YAML file, returning the parsed value or a validation issue. */
|
|
376
|
+
function parseYamlFile(filePath, source) {
|
|
377
|
+
if (!fs.existsSync(filePath)) {
|
|
378
|
+
return {
|
|
379
|
+
issue: error(source, `File not found: ${filePath}`, filePath),
|
|
380
|
+
ok: false,
|
|
381
|
+
};
|
|
382
|
+
}
|
|
383
|
+
try {
|
|
384
|
+
const raw = fs.readFileSync(filePath, "utf-8");
|
|
385
|
+
const data = load(raw);
|
|
386
|
+
return { data, ok: true };
|
|
387
|
+
}
|
|
388
|
+
catch (err) {
|
|
389
|
+
const message = err instanceof Error ? err.message : "Unknown YAML parse error";
|
|
390
|
+
return {
|
|
391
|
+
issue: error(source, `Failed to parse YAML: ${message}`, filePath),
|
|
392
|
+
ok: false,
|
|
393
|
+
};
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
// ---------------------------------------------------------------------------
|
|
397
|
+
// Main entry point
|
|
398
|
+
// ---------------------------------------------------------------------------
|
|
399
|
+
function warning(source, message, filePath) {
|
|
400
|
+
return {
|
|
401
|
+
message,
|
|
402
|
+
severity: "warning",
|
|
403
|
+
source,
|
|
404
|
+
...(filePath && { path: filePath }),
|
|
405
|
+
};
|
|
406
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* webhook-server.ts
|
|
3
|
+
*
|
|
4
|
+
* Local development server for testing the webhook handler.
|
|
5
|
+
*
|
|
6
|
+
* Starts an HTTP server that receives Sanity webhook payloads, processes
|
|
7
|
+
* them through the WebhookHandler, and logs results. Useful for local
|
|
8
|
+
* development and testing the full event-driven trigger flow.
|
|
9
|
+
*
|
|
10
|
+
* Migrated from lib/webhook-server.ts — accepts rootDir and port as parameters.
|
|
11
|
+
*
|
|
12
|
+
* @see docs/design-docs/report-store/visibility-workflows.md
|
|
13
|
+
*/
|
|
14
|
+
import { createServer } from "http";
|
|
15
|
+
import { WebhookHandler } from "../webhook/handler.js";
|
|
16
|
+
export interface WebhookServerOptions {
|
|
17
|
+
/** Root directory of the eval package */
|
|
18
|
+
rootDir: string;
|
|
19
|
+
/** Server port (default: 3333) */
|
|
20
|
+
port?: number;
|
|
21
|
+
/** GitHub token for dispatching workflow runs (omit for dry-run) */
|
|
22
|
+
githubToken?: string;
|
|
23
|
+
/** Daily dispatch budget */
|
|
24
|
+
dailyBudget?: number;
|
|
25
|
+
/** Debounce interval in ms */
|
|
26
|
+
debounceMs?: number;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Start the webhook development server.
|
|
30
|
+
*
|
|
31
|
+
* This function blocks and keeps the process alive.
|
|
32
|
+
* Returns the HTTP server instance for graceful shutdown in tests.
|
|
33
|
+
*/
|
|
34
|
+
export declare function startWebhookServer(options: WebhookServerOptions): {
|
|
35
|
+
server: ReturnType<typeof createServer>;
|
|
36
|
+
handler: WebhookHandler;
|
|
37
|
+
};
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* webhook-server.ts
|
|
3
|
+
*
|
|
4
|
+
* Local development server for testing the webhook handler.
|
|
5
|
+
*
|
|
6
|
+
* Starts an HTTP server that receives Sanity webhook payloads, processes
|
|
7
|
+
* them through the WebhookHandler, and logs results. Useful for local
|
|
8
|
+
* development and testing the full event-driven trigger flow.
|
|
9
|
+
*
|
|
10
|
+
* Migrated from lib/webhook-server.ts — accepts rootDir and port as parameters.
|
|
11
|
+
*
|
|
12
|
+
* @see docs/design-docs/report-store/visibility-workflows.md
|
|
13
|
+
*/
|
|
14
|
+
import { createServer } from "http";
|
|
15
|
+
import { allTrackedSlugs, buildReverseMapping } from "./reverse-mapping.js";
|
|
16
|
+
import { WebhookHandler } from "../webhook/handler.js";
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
// Server
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
/**
|
|
21
|
+
* Start the webhook development server.
|
|
22
|
+
*
|
|
23
|
+
* This function blocks and keeps the process alive.
|
|
24
|
+
* Returns the HTTP server instance for graceful shutdown in tests.
|
|
25
|
+
*/
|
|
26
|
+
export function startWebhookServer(options) {
|
|
27
|
+
const { rootDir, port = 3333, githubToken = "", dailyBudget = 20, debounceMs = 10000, } = options;
|
|
28
|
+
const DRY_RUN = !githubToken;
|
|
29
|
+
const handler = new WebhookHandler({
|
|
30
|
+
dailyBudget,
|
|
31
|
+
debounceMs,
|
|
32
|
+
githubToken,
|
|
33
|
+
rootDir,
|
|
34
|
+
});
|
|
35
|
+
const server = createServer((req, res) => {
|
|
36
|
+
void handleRequest(req, res, { handler, rootDir, port, DRY_RUN });
|
|
37
|
+
});
|
|
38
|
+
server.listen(port, () => {
|
|
39
|
+
const reverseMapping = buildReverseMapping(rootDir);
|
|
40
|
+
const slugCount = allTrackedSlugs(reverseMapping).length;
|
|
41
|
+
console.log();
|
|
42
|
+
console.log("=== AILF Webhook Server ===");
|
|
43
|
+
console.log();
|
|
44
|
+
console.log(` Port: ${port}`);
|
|
45
|
+
console.log(` Mode: ${DRY_RUN ? "DRY RUN (set GITHUB_TOKEN to dispatch)" : "LIVE"}`);
|
|
46
|
+
console.log(` Tracked slugs: ${slugCount}`);
|
|
47
|
+
console.log(` Debounce: ${handler.diagnostics().pendingSlugs}`);
|
|
48
|
+
console.log();
|
|
49
|
+
console.log(" Endpoints:");
|
|
50
|
+
console.log(` POST http://localhost:${port}/webhook — handle webhook`);
|
|
51
|
+
console.log(` GET http://localhost:${port}/health — diagnostics`);
|
|
52
|
+
console.log(` GET http://localhost:${port}/mappings — slug → area map`);
|
|
53
|
+
console.log();
|
|
54
|
+
});
|
|
55
|
+
// Graceful shutdown
|
|
56
|
+
process.on("SIGINT", () => {
|
|
57
|
+
console.log("\n Shutting down — flushing debounce window...");
|
|
58
|
+
void handler.shutdown().then(() => {
|
|
59
|
+
server.close();
|
|
60
|
+
process.exit(0);
|
|
61
|
+
});
|
|
62
|
+
});
|
|
63
|
+
return { server, handler };
|
|
64
|
+
}
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
66
|
+
// Request handler
|
|
67
|
+
// ---------------------------------------------------------------------------
|
|
68
|
+
async function handleRequest(req, res, ctx) {
|
|
69
|
+
const url = new URL(req.url ?? "/", `http://localhost:${ctx.port}`);
|
|
70
|
+
// CORS headers for local dev
|
|
71
|
+
res.setHeader("Access-Control-Allow-Origin", "*");
|
|
72
|
+
res.setHeader("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
|
|
73
|
+
res.setHeader("Access-Control-Allow-Headers", "Content-Type");
|
|
74
|
+
if (req.method === "OPTIONS") {
|
|
75
|
+
res.writeHead(200);
|
|
76
|
+
res.end();
|
|
77
|
+
return;
|
|
78
|
+
}
|
|
79
|
+
// Health check
|
|
80
|
+
if (url.pathname === "/health" && req.method === "GET") {
|
|
81
|
+
const diagnostics = ctx.handler.diagnostics();
|
|
82
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
83
|
+
res.end(JSON.stringify({ dryRun: ctx.DRY_RUN, ...diagnostics }, null, 2));
|
|
84
|
+
return;
|
|
85
|
+
}
|
|
86
|
+
// Mappings
|
|
87
|
+
if (url.pathname === "/mappings" && req.method === "GET") {
|
|
88
|
+
const reverseMapping = buildReverseMapping(ctx.rootDir);
|
|
89
|
+
const slugs = allTrackedSlugs(reverseMapping);
|
|
90
|
+
const mappings = Object.fromEntries(slugs.map((slug) => [slug, reverseMapping.get(slug)]));
|
|
91
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
92
|
+
res.end(JSON.stringify({ mappings, slugCount: slugs.length }, null, 2));
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
95
|
+
// Webhook handler
|
|
96
|
+
if (url.pathname === "/webhook" && req.method === "POST") {
|
|
97
|
+
const body = await readBody(req);
|
|
98
|
+
let payload;
|
|
99
|
+
try {
|
|
100
|
+
payload = JSON.parse(body);
|
|
101
|
+
}
|
|
102
|
+
catch {
|
|
103
|
+
res.writeHead(400, { "Content-Type": "application/json" });
|
|
104
|
+
res.end(JSON.stringify({ error: "Invalid JSON" }));
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
if (ctx.DRY_RUN) {
|
|
108
|
+
console.log("\n 🔶 DRY RUN — would dispatch (no GITHUB_TOKEN set)");
|
|
109
|
+
}
|
|
110
|
+
const result = ctx.handler.handle(payload);
|
|
111
|
+
console.log(` → ${result.status}:`, JSON.stringify(result));
|
|
112
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
113
|
+
res.end(JSON.stringify(result, null, 2));
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
// 404
|
|
117
|
+
res.writeHead(404, { "Content-Type": "application/json" });
|
|
118
|
+
res.end(JSON.stringify({
|
|
119
|
+
endpoints: ["POST /webhook", "GET /health", "GET /mappings"],
|
|
120
|
+
error: "Not found",
|
|
121
|
+
}));
|
|
122
|
+
}
|
|
123
|
+
// ---------------------------------------------------------------------------
|
|
124
|
+
// Helpers
|
|
125
|
+
// ---------------------------------------------------------------------------
|
|
126
|
+
function readBody(req) {
|
|
127
|
+
return new Promise((resolve, reject) => {
|
|
128
|
+
const chunks = [];
|
|
129
|
+
req.on("data", (chunk) => chunks.push(chunk));
|
|
130
|
+
req.on("end", () => resolve(Buffer.concat(chunks).toString()));
|
|
131
|
+
req.on("error", reject);
|
|
132
|
+
});
|
|
133
|
+
}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* report-store.ts
|
|
3
|
+
*
|
|
4
|
+
* Persistent report store backed by the Sanity Content Lake.
|
|
5
|
+
* Provides write, read, and lineage-query operations for evaluation reports.
|
|
6
|
+
*
|
|
7
|
+
* The Sanity Content Lake is the system of record (P1: immutable events).
|
|
8
|
+
* Reports are stored as typed documents in the shared dataset, enabling
|
|
9
|
+
* GROQ queries, Studio dashboards, and webhook-driven notifications.
|
|
10
|
+
*
|
|
11
|
+
* If Sanity is unreachable, the store logs a warning and returns gracefully
|
|
12
|
+
* (P5: local-first — the pipeline never fails because of a store write).
|
|
13
|
+
*
|
|
14
|
+
* @see docs/design-docs/report-store/architecture.md
|
|
15
|
+
* @see docs/design-docs/report-store/domain-model.md
|
|
16
|
+
*/
|
|
17
|
+
import type { SanityClient } from "@sanity/client";
|
|
18
|
+
import type { ComparisonReport, ISOTimestamp, LineageQuery, Report, ReportId, ReportProvenance, ScoreSummary } from "./pipeline/types.js";
|
|
19
|
+
export interface ReportStoreOptions {
|
|
20
|
+
/** Override the Sanity client (for testing) */
|
|
21
|
+
client?: SanityClient;
|
|
22
|
+
/** Sanity dataset name */
|
|
23
|
+
dataset?: string;
|
|
24
|
+
/** Sanity project ID */
|
|
25
|
+
projectId?: string;
|
|
26
|
+
/** Sanity API token (required for writes) */
|
|
27
|
+
token?: string;
|
|
28
|
+
}
|
|
29
|
+
export declare class ReportStore {
|
|
30
|
+
private readonly client;
|
|
31
|
+
constructor(options?: ReportStoreOptions);
|
|
32
|
+
/**
|
|
33
|
+
* Auto-compare: find the most recent comparable report and compute
|
|
34
|
+
* a ComparisonReport using the existing compare() primitive.
|
|
35
|
+
*
|
|
36
|
+
* @returns The comparison report, or null if no baseline found or on error
|
|
37
|
+
*/
|
|
38
|
+
autoCompare(currentSummary: ScoreSummary, provenance: ReportProvenance, completedAt: ISOTimestamp): Promise<ComparisonReport | null>;
|
|
39
|
+
/**
|
|
40
|
+
* Find a report by its evaluation fingerprint (cross-environment cache lookup).
|
|
41
|
+
*
|
|
42
|
+
* Returns the most recent non-debug report whose provenance contains a
|
|
43
|
+
* matching `evalFingerprint`. Used by the pipeline to skip the expensive
|
|
44
|
+
* eval step when identical inputs have already been evaluated.
|
|
45
|
+
*
|
|
46
|
+
* @returns The cached report, or null if no match or on error
|
|
47
|
+
* @see docs/design-docs/content-lake-eval-caching.md
|
|
48
|
+
*/
|
|
49
|
+
findByFingerprint(fingerprint: string): Promise<null | Report>;
|
|
50
|
+
/**
|
|
51
|
+
* Find the most recent comparable report for auto-comparison.
|
|
52
|
+
*
|
|
53
|
+
* Uses GROQ to match on key provenance dimensions (mode, source name)
|
|
54
|
+
* and returns the most recent report before the given timestamp.
|
|
55
|
+
*
|
|
56
|
+
* "Comparable" means: same evaluation mode + same source name.
|
|
57
|
+
* More granular matching (areas, models) can be added as needed.
|
|
58
|
+
*
|
|
59
|
+
* @see docs/design-docs/report-store/architecture.md — Auto-comparison
|
|
60
|
+
*/
|
|
61
|
+
findComparableBaseline(query: LineageQuery): Promise<null | Report>;
|
|
62
|
+
/**
|
|
63
|
+
* Read a report by its ID.
|
|
64
|
+
*/
|
|
65
|
+
read(id: ReportId): Promise<null | Report>;
|
|
66
|
+
/**
|
|
67
|
+
* Write a report to the Sanity Content Lake.
|
|
68
|
+
*
|
|
69
|
+
* Creates an immutable `ailf.report` document. The document _id is
|
|
70
|
+
* prefixed with `report-` for easy GROQ filtering.
|
|
71
|
+
*
|
|
72
|
+
* @returns The report ID on success, null on failure (logged, not thrown)
|
|
73
|
+
*/
|
|
74
|
+
write(report: Report): Promise<null | ReportId>;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Generate a UUID v7 (time-sortable) for report identification.
|
|
78
|
+
*
|
|
79
|
+
* UUID v7 encodes the current timestamp in the high bits, making reports
|
|
80
|
+
* naturally sort by creation time in both GROQ and string comparisons.
|
|
81
|
+
*
|
|
82
|
+
* Uses crypto.randomUUID() as a base and overwrites the timestamp portion.
|
|
83
|
+
*/
|
|
84
|
+
export declare function generateReportId(): ReportId;
|