@sanity/ailf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -0
- package/bin/ailf.js +64 -0
- package/canonical/grader-references/README.md +88 -0
- package/canonical/grader-references/groq.yaml +234 -0
- package/canonical/grader-references/studio-setup.yaml +275 -0
- package/canonical/reference-solutions/.gitkeep +1 -0
- package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
- package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
- package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
- package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
- package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
- package/canonical/reference-solutions/groq/joins-references.ts +300 -0
- package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
- package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
- package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
- package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
- package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
- package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
- package/config/bigquery/README.md +74 -0
- package/config/bigquery/views/area_scores.sql +87 -0
- package/config/bigquery/views/reports.sql +49 -0
- package/config/features.yaml +116 -0
- package/config/models.yaml +115 -0
- package/config/prompts.yaml +75 -0
- package/config/rubrics.yaml +62 -0
- package/config/schedules.yaml +43 -0
- package/config/sinks.yaml +54 -0
- package/config/sources.yaml +51 -0
- package/config/thresholds.yaml +49 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
- package/dist/_vendor/ailf-core/examples/index.js +285 -0
- package/dist/_vendor/ailf-core/index.d.ts +17 -0
- package/dist/_vendor/ailf-core/index.js +17 -0
- package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
- package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
- package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
- package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
- package/dist/_vendor/ailf-core/ports/context.js +14 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
- package/dist/_vendor/ailf-core/ports/index.js +7 -0
- package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
- package/dist/_vendor/ailf-core/ports/logger.js +11 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
- package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/schemas/index.js +16 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
- package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
- package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
- package/dist/_vendor/ailf-core/services/index.js +12 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
- package/dist/_vendor/ailf-core/services/scoring.js +222 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
- package/dist/_vendor/ailf-core/types/index.js +21 -0
- package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
- package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
- package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
- package/dist/_vendor/ailf-shared/document-ref.js +1 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
- package/dist/_vendor/ailf-shared/index.d.ts +16 -0
- package/dist/_vendor/ailf-shared/index.js +16 -0
- package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
- package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
- package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
- package/dist/_vendor/ailf-shared/score-grades.js +23 -0
- package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
- package/dist/adapters/cache/content-lake-cache.js +59 -0
- package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
- package/dist/adapters/cache/filesystem-cache.js +54 -0
- package/dist/adapters/cache/index.d.ts +2 -0
- package/dist/adapters/cache/index.js +2 -0
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
- package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
- package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
- package/dist/adapters/config-sources/file-config-adapter.js +96 -0
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +2 -0
- package/dist/adapters/doc-fetchers/index.d.ts +1 -0
- package/dist/adapters/doc-fetchers/index.js +1 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
- package/dist/adapters/eval-runners/index.d.ts +1 -0
- package/dist/adapters/eval-runners/index.js +1 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
- package/dist/adapters/index.d.ts +12 -0
- package/dist/adapters/index.js +12 -0
- package/dist/adapters/loggers/console-logger.d.ts +22 -0
- package/dist/adapters/loggers/console-logger.js +54 -0
- package/dist/adapters/loggers/index.d.ts +9 -0
- package/dist/adapters/loggers/index.js +9 -0
- package/dist/adapters/loggers/json-logger.d.ts +18 -0
- package/dist/adapters/loggers/json-logger.js +33 -0
- package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
- package/dist/adapters/loggers/quiet-logger.js +30 -0
- package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/composite-task-source.js +59 -0
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
- package/dist/adapters/task-sources/index.d.ts +7 -0
- package/dist/adapters/task-sources/index.js +7 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
- package/dist/adapters/task-sources/repo-schemas.js +234 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
- package/dist/adapters/task-sources/repo-task-source.js +104 -0
- package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
- package/dist/adapters/task-sources/repo-trigger.js +153 -0
- package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
- package/dist/adapters/task-sources/repo-validation.js +164 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
- package/dist/adapters/task-sources/yaml-task-source.js +136 -0
- package/dist/agent-observer/agentic-provider.d.ts +132 -0
- package/dist/agent-observer/agentic-provider.js +983 -0
- package/dist/agent-observer/classifier.d.ts +62 -0
- package/dist/agent-observer/classifier.js +269 -0
- package/dist/agent-observer/index.d.ts +7 -0
- package/dist/agent-observer/index.js +4 -0
- package/dist/agent-observer/pricing.d.ts +35 -0
- package/dist/agent-observer/pricing.js +82 -0
- package/dist/agent-observer/provider.d.ts +77 -0
- package/dist/agent-observer/provider.js +151 -0
- package/dist/agent-observer/proxy.d.ts +91 -0
- package/dist/agent-observer/proxy.js +321 -0
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/agent-observer/types.d.ts +137 -0
- package/dist/agent-observer/types.js +16 -0
- package/dist/assertions/source-isolation.d.ts +72 -0
- package/dist/assertions/source-isolation.js +117 -0
- package/dist/cli.d.ts +24 -0
- package/dist/cli.js +199 -0
- package/dist/commands/agent-report.d.ts +5 -0
- package/dist/commands/agent-report.js +69 -0
- package/dist/commands/baseline.d.ts +9 -0
- package/dist/commands/baseline.js +141 -0
- package/dist/commands/cache.d.ts +13 -0
- package/dist/commands/cache.js +135 -0
- package/dist/commands/calculate-scores.d.ts +8 -0
- package/dist/commands/calculate-scores.js +48 -0
- package/dist/commands/compare.d.ts +8 -0
- package/dist/commands/compare.js +120 -0
- package/dist/commands/completion.d.ts +18 -0
- package/dist/commands/completion.js +260 -0
- package/dist/commands/coverage-audit.d.ts +7 -0
- package/dist/commands/coverage-audit.js +40 -0
- package/dist/commands/discovery-report.d.ts +10 -0
- package/dist/commands/discovery-report.js +44 -0
- package/dist/commands/eval.d.ts +9 -0
- package/dist/commands/eval.js +35 -0
- package/dist/commands/explain-handler.d.ts +34 -0
- package/dist/commands/explain-handler.js +719 -0
- package/dist/commands/fetch-docs.d.ts +8 -0
- package/dist/commands/fetch-docs.js +128 -0
- package/dist/commands/generate-configs.d.ts +8 -0
- package/dist/commands/generate-configs.js +46 -0
- package/dist/commands/grader/index.d.ts +11 -0
- package/dist/commands/grader/index.js +118 -0
- package/dist/commands/init.d.ts +19 -0
- package/dist/commands/init.js +150 -0
- package/dist/commands/interactive.d.ts +12 -0
- package/dist/commands/interactive.js +238 -0
- package/dist/commands/lookup-doc.d.ts +15 -0
- package/dist/commands/lookup-doc.js +84 -0
- package/dist/commands/measure-retrieval.d.ts +5 -0
- package/dist/commands/measure-retrieval.js +65 -0
- package/dist/commands/pipeline-action.d.ts +71 -0
- package/dist/commands/pipeline-action.js +305 -0
- package/dist/commands/pipeline.d.ts +62 -0
- package/dist/commands/pipeline.js +53 -0
- package/dist/commands/pr-comment.d.ts +8 -0
- package/dist/commands/pr-comment.js +47 -0
- package/dist/commands/publish.d.ts +26 -0
- package/dist/commands/publish.js +253 -0
- package/dist/commands/readiness-report.d.ts +10 -0
- package/dist/commands/readiness-report.js +104 -0
- package/dist/commands/shared/options.d.ts +29 -0
- package/dist/commands/shared/options.js +57 -0
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +16 -0
- package/dist/commands/validate-tasks.js +93 -0
- package/dist/commands/validate.d.ts +9 -0
- package/dist/commands/validate.js +73 -0
- package/dist/commands/webhook-server.d.ts +5 -0
- package/dist/commands/webhook-server.js +30 -0
- package/dist/commands/weekly-digest.d.ts +10 -0
- package/dist/commands/weekly-digest.js +104 -0
- package/dist/composition-root.d.ts +26 -0
- package/dist/composition-root.js +107 -0
- package/dist/interpolate.d.ts +26 -0
- package/dist/interpolate.js +70 -0
- package/dist/job-store.d.ts +104 -0
- package/dist/job-store.js +188 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.d.ts +27 -0
- package/dist/orchestration/build-app-context.js +81 -0
- package/dist/orchestration/build-step-sequence.d.ts +15 -0
- package/dist/orchestration/build-step-sequence.js +84 -0
- package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
- package/dist/orchestration/config-to-source-overrides.js +28 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/index.d.ts +11 -0
- package/dist/orchestration/index.js +11 -0
- package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
- package/dist/orchestration/pipeline-orchestrator.js +153 -0
- package/dist/orchestration/step-runner.d.ts +20 -0
- package/dist/orchestration/step-runner.js +88 -0
- package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
- package/dist/orchestration/steps/calculate-scores-step.js +95 -0
- package/dist/orchestration/steps/callback-step.d.ts +24 -0
- package/dist/orchestration/steps/callback-step.js +76 -0
- package/dist/orchestration/steps/compare-step.d.ts +14 -0
- package/dist/orchestration/steps/compare-step.js +92 -0
- package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
- package/dist/orchestration/steps/discovery-report-step.js +55 -0
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
- package/dist/orchestration/steps/fetch-docs-step.js +135 -0
- package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
- package/dist/orchestration/steps/gap-analysis-step.js +136 -0
- package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
- package/dist/orchestration/steps/generate-configs-step.js +85 -0
- package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
- package/dist/orchestration/steps/grader-consistency-step.js +64 -0
- package/dist/orchestration/steps/index.d.ts +19 -0
- package/dist/orchestration/steps/index.js +19 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
- package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
- package/dist/orchestration/steps/publish-report-step.js +216 -0
- package/dist/orchestration/steps/readiness-step.d.ts +13 -0
- package/dist/orchestration/steps/readiness-step.js +91 -0
- package/dist/orchestration/steps/report-step.d.ts +12 -0
- package/dist/orchestration/steps/report-step.js +49 -0
- package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
- package/dist/orchestration/steps/run-eval-step.js +195 -0
- package/dist/orchestration/steps/validate-step.d.ts +12 -0
- package/dist/orchestration/steps/validate-step.js +41 -0
- package/dist/pipeline/agent-behavior-report.d.ts +53 -0
- package/dist/pipeline/agent-behavior-report.js +132 -0
- package/dist/pipeline/attribution.d.ts +47 -0
- package/dist/pipeline/attribution.js +226 -0
- package/dist/pipeline/baseline.d.ts +37 -0
- package/dist/pipeline/baseline.js +141 -0
- package/dist/pipeline/cache.d.ts +101 -0
- package/dist/pipeline/cache.js +283 -0
- package/dist/pipeline/calculate-scores.d.ts +102 -0
- package/dist/pipeline/calculate-scores.js +1128 -0
- package/dist/pipeline/callback-delivery.d.ts +50 -0
- package/dist/pipeline/callback-delivery.js +89 -0
- package/dist/pipeline/checks.d.ts +39 -0
- package/dist/pipeline/checks.js +280 -0
- package/dist/pipeline/classify-url.d.ts +61 -0
- package/dist/pipeline/classify-url.js +93 -0
- package/dist/pipeline/compare.d.ts +31 -0
- package/dist/pipeline/compare.js +208 -0
- package/dist/pipeline/coverage-audit.d.ts +39 -0
- package/dist/pipeline/coverage-audit.js +165 -0
- package/dist/pipeline/degradations.d.ts +85 -0
- package/dist/pipeline/degradations.js +242 -0
- package/dist/pipeline/discovery-report.d.ts +55 -0
- package/dist/pipeline/discovery-report.js +178 -0
- package/dist/pipeline/eval-constants.d.ts +68 -0
- package/dist/pipeline/eval-constants.js +111 -0
- package/dist/pipeline/eval-fingerprint.d.ts +66 -0
- package/dist/pipeline/eval-fingerprint.js +175 -0
- package/dist/pipeline/expand-tasks.d.ts +220 -0
- package/dist/pipeline/expand-tasks.js +421 -0
- package/dist/pipeline/failure-modes.d.ts +46 -0
- package/dist/pipeline/failure-modes.js +348 -0
- package/dist/pipeline/fetch-url-content.d.ts +44 -0
- package/dist/pipeline/fetch-url-content.js +93 -0
- package/dist/pipeline/gap-analysis.d.ts +48 -0
- package/dist/pipeline/gap-analysis.js +231 -0
- package/dist/pipeline/generate-configs.d.ts +72 -0
- package/dist/pipeline/generate-configs.js +395 -0
- package/dist/pipeline/grader-api.d.ts +49 -0
- package/dist/pipeline/grader-api.js +200 -0
- package/dist/pipeline/grader-compare-runner.d.ts +44 -0
- package/dist/pipeline/grader-compare-runner.js +301 -0
- package/dist/pipeline/grader-comparison.d.ts +111 -0
- package/dist/pipeline/grader-comparison.js +161 -0
- package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
- package/dist/pipeline/grader-consistency-runner.js +270 -0
- package/dist/pipeline/grader-consistency.d.ts +103 -0
- package/dist/pipeline/grader-consistency.js +146 -0
- package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
- package/dist/pipeline/grader-sensitivity-runner.js +282 -0
- package/dist/pipeline/grader-sensitivity.d.ts +94 -0
- package/dist/pipeline/grader-sensitivity.js +144 -0
- package/dist/pipeline/grader-validate-runner.d.ts +38 -0
- package/dist/pipeline/grader-validate-runner.js +229 -0
- package/dist/pipeline/grader-validation.d.ts +107 -0
- package/dist/pipeline/grader-validation.js +169 -0
- package/dist/pipeline/map-request-to-config.d.ts +19 -0
- package/dist/pipeline/map-request-to-config.js +80 -0
- package/dist/pipeline/measure-retrieval.d.ts +59 -0
- package/dist/pipeline/measure-retrieval.js +111 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
- package/dist/pipeline/mirror-repo-tasks.js +350 -0
- package/dist/pipeline/plan-format.d.ts +33 -0
- package/dist/pipeline/plan-format.js +202 -0
- package/dist/pipeline/plan.d.ts +169 -0
- package/dist/pipeline/plan.js +708 -0
- package/dist/pipeline/pr-comment.d.ts +19 -0
- package/dist/pipeline/pr-comment.js +502 -0
- package/dist/pipeline/probe.d.ts +52 -0
- package/dist/pipeline/probe.js +390 -0
- package/dist/pipeline/provenance.d.ts +47 -0
- package/dist/pipeline/provenance.js +146 -0
- package/dist/pipeline/readiness-report.d.ts +87 -0
- package/dist/pipeline/readiness-report.js +205 -0
- package/dist/pipeline/release-classification.d.ts +54 -0
- package/dist/pipeline/release-classification.js +238 -0
- package/dist/pipeline/release-report.d.ts +37 -0
- package/dist/pipeline/release-report.js +222 -0
- package/dist/pipeline/repo-eval-comment.d.ts +37 -0
- package/dist/pipeline/repo-eval-comment.js +165 -0
- package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
- package/dist/pipeline/repo-threshold-evaluator.js +162 -0
- package/dist/pipeline/resolve-mappings.d.ts +35 -0
- package/dist/pipeline/resolve-mappings.js +72 -0
- package/dist/pipeline/retrieval-metrics.d.ts +39 -0
- package/dist/pipeline/retrieval-metrics.js +136 -0
- package/dist/pipeline/reverse-mapping.d.ts +67 -0
- package/dist/pipeline/reverse-mapping.js +88 -0
- package/dist/pipeline/schemas.d.ts +9 -0
- package/dist/pipeline/schemas.js +9 -0
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +88 -0
- package/dist/pipeline/targeted-loo.js +203 -0
- package/dist/pipeline/thresholds.d.ts +27 -0
- package/dist/pipeline/thresholds.js +245 -0
- package/dist/pipeline/types.d.ts +10 -0
- package/dist/pipeline/types.js +10 -0
- package/dist/pipeline/validate.d.ts +67 -0
- package/dist/pipeline/validate.js +406 -0
- package/dist/pipeline/webhook-server.d.ts +37 -0
- package/dist/pipeline/webhook-server.js +133 -0
- package/dist/report-store.d.ts +84 -0
- package/dist/report-store.js +208 -0
- package/dist/sanity/client.d.ts +38 -0
- package/dist/sanity/client.js +86 -0
- package/dist/sanity/portable-text.d.ts +11 -0
- package/dist/sanity/portable-text.js +211 -0
- package/dist/sanity/queries.d.ts +133 -0
- package/dist/sanity/queries.js +300 -0
- package/dist/schedules/digest.d.ts +116 -0
- package/dist/schedules/digest.js +156 -0
- package/dist/schedules/index.d.ts +12 -0
- package/dist/schedules/index.js +10 -0
- package/dist/schedules/loader.d.ts +31 -0
- package/dist/schedules/loader.js +73 -0
- package/dist/schedules/schema.d.ts +9 -0
- package/dist/schedules/schema.js +9 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +21 -0
- package/dist/scripts/validate-task-sources.js +210 -0
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/bigquery/index.d.ts +131 -0
- package/dist/sinks/bigquery/index.js +222 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/index.d.ts +23 -0
- package/dist/sinks/index.js +18 -0
- package/dist/sinks/loader.d.ts +18 -0
- package/dist/sinks/loader.js +82 -0
- package/dist/sinks/retry.d.ts +24 -0
- package/dist/sinks/retry.js +52 -0
- package/dist/sinks/schema.d.ts +9 -0
- package/dist/sinks/schema.js +9 -0
- package/dist/sinks/slack/format.d.ts +65 -0
- package/dist/sinks/slack/format.js +327 -0
- package/dist/sinks/slack/index.d.ts +27 -0
- package/dist/sinks/slack/index.js +78 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +59 -0
- package/dist/sinks/types.js +44 -0
- package/dist/sinks/webhook/index.d.ts +19 -0
- package/dist/sinks/webhook/index.js +50 -0
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/sources.d.ts +104 -0
- package/dist/sources.js +292 -0
- package/dist/webhook/budget.d.ts +42 -0
- package/dist/webhook/budget.js +60 -0
- package/dist/webhook/debounce.d.ts +67 -0
- package/dist/webhook/debounce.js +76 -0
- package/dist/webhook/dispatch.d.ts +45 -0
- package/dist/webhook/dispatch.js +84 -0
- package/dist/webhook/eval-request-handler.d.ts +87 -0
- package/dist/webhook/eval-request-handler.js +181 -0
- package/dist/webhook/handler.d.ts +88 -0
- package/dist/webhook/handler.js +203 -0
- package/dist/webhook/index.d.ts +17 -0
- package/dist/webhook/index.js +12 -0
- package/dist/webhook/types.d.ts +109 -0
- package/dist/webhook/types.js +10 -0
- package/package.json +72 -0
- package/tasks/.expanded.agentic.yaml +51 -0
- package/tasks/.expanded.yaml +66 -0
- package/tasks/frameworks.yaml +98 -0
- package/tasks/functions.yaml +51 -0
- package/tasks/groq.yaml +216 -0
- package/tasks/nextjs-live.yaml +62 -0
- package/tasks/studio-setup.yaml +111 -0
- package/tasks/visual-editing.yaml +120 -0
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sinks/format-slack.ts
|
|
3
|
+
*
|
|
4
|
+
* Formats evaluation report data into Slack Block Kit structures for the
|
|
5
|
+
* SlackSink. Provides two message formats:
|
|
6
|
+
*
|
|
7
|
+
* - `formatRegressionAlert` — detailed regression notification with
|
|
8
|
+
* per-area dimension breakdowns
|
|
9
|
+
* - `formatScoreSummary` — compact score overview for general reporting
|
|
10
|
+
*
|
|
11
|
+
* @see docs/design-docs/report-store/sink-architecture.md
|
|
12
|
+
*/
|
|
13
|
+
// ---------------------------------------------------------------------------
|
|
14
|
+
// Helpers
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
/**
|
|
17
|
+
* Format a regression alert for areas that have regressed.
|
|
18
|
+
*
|
|
19
|
+
* Produces a rich Slack message with:
|
|
20
|
+
* - Header with overall score change
|
|
21
|
+
* - Context metadata (mode, source, timestamp, promptfoo link)
|
|
22
|
+
* - Per-area regression details with dimension breakdowns
|
|
23
|
+
* - Brief mentions of improved and unchanged areas
|
|
24
|
+
*/
|
|
25
|
+
export function formatRegressionAlert(report) {
|
|
26
|
+
const { comparison, provenance, summary } = report;
|
|
27
|
+
if (!comparison) {
|
|
28
|
+
return {
|
|
29
|
+
blocks: [
|
|
30
|
+
{
|
|
31
|
+
text: { text: "⚠️ No comparison data available", type: "mrkdwn" },
|
|
32
|
+
type: "section",
|
|
33
|
+
},
|
|
34
|
+
],
|
|
35
|
+
text: "No comparison data available",
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
const baselineScore = Math.round(comparison.baseline.overall.avgScore);
|
|
39
|
+
const experimentScore = Math.round(comparison.experiment.overall.avgScore);
|
|
40
|
+
const delta = Math.round(comparison.deltas.overall);
|
|
41
|
+
const blocks = [];
|
|
42
|
+
// Header — emoji + title + overall score change
|
|
43
|
+
blocks.push({
|
|
44
|
+
text: {
|
|
45
|
+
text: `📉 *AI Literacy Score Regression*\n` +
|
|
46
|
+
`Overall: ${baselineScore} → ${experimentScore} (${formatDelta(delta)})`,
|
|
47
|
+
type: "mrkdwn",
|
|
48
|
+
},
|
|
49
|
+
type: "section",
|
|
50
|
+
});
|
|
51
|
+
// Context — mode, source, timestamp, promptfoo link
|
|
52
|
+
const contextElements = [
|
|
53
|
+
{ text: `*Mode:* ${provenance.mode}`, type: "mrkdwn" },
|
|
54
|
+
{ text: `*Source:* ${provenance.source.name}`, type: "mrkdwn" },
|
|
55
|
+
{ text: `*Date:* ${readableDate(summary.timestamp)}`, type: "mrkdwn" },
|
|
56
|
+
];
|
|
57
|
+
if (provenance.promptfooUrl) {
|
|
58
|
+
contextElements.push({
|
|
59
|
+
text: `<${provenance.promptfooUrl}|View in Promptfoo>`,
|
|
60
|
+
type: "mrkdwn",
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
blocks.push({ elements: contextElements, type: "context" });
|
|
64
|
+
// Divider
|
|
65
|
+
blocks.push({ type: "divider" });
|
|
66
|
+
// Regressed areas — detailed fields with dimension breakdowns
|
|
67
|
+
const regressedAreas = comparison.areas.filter((a) => a.change === "regressed");
|
|
68
|
+
if (regressedAreas.length > 0) {
|
|
69
|
+
const fields = regressedAreas.map((area) => ({
|
|
70
|
+
text: `*${area.area}:* ${Math.round(area.baseline)} → ` +
|
|
71
|
+
`${Math.round(area.experiment)} (${formatDelta(Math.round(area.delta))})\n` +
|
|
72
|
+
dimensionBreakdown(area.dimensions),
|
|
73
|
+
type: "mrkdwn",
|
|
74
|
+
}));
|
|
75
|
+
blocks.push({
|
|
76
|
+
fields,
|
|
77
|
+
text: {
|
|
78
|
+
text: `*Regressed Areas (${regressedAreas.length})*`,
|
|
79
|
+
type: "mrkdwn",
|
|
80
|
+
},
|
|
81
|
+
type: "section",
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
// Improved areas — compact mention
|
|
85
|
+
if (comparison.improved.length > 0) {
|
|
86
|
+
blocks.push({
|
|
87
|
+
text: {
|
|
88
|
+
text: `📈 ${comparison.improved.length} area${comparison.improved.length === 1 ? "" : "s"} improved: ${comparison.improved.join(", ")}`,
|
|
89
|
+
type: "mrkdwn",
|
|
90
|
+
},
|
|
91
|
+
type: "section",
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
// Unchanged areas — brief mention
|
|
95
|
+
if (comparison.unchanged.length > 0) {
|
|
96
|
+
blocks.push({
|
|
97
|
+
text: {
|
|
98
|
+
text: `➡️ ${comparison.unchanged.length} area${comparison.unchanged.length === 1 ? "" : "s"} unchanged`,
|
|
99
|
+
type: "mrkdwn",
|
|
100
|
+
},
|
|
101
|
+
type: "section",
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
return {
|
|
105
|
+
blocks,
|
|
106
|
+
text: `📉 AI Literacy Score Regression: ${baselineScore} → ${experimentScore} (${formatDelta(delta)})`,
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Format a general score summary for Slack reporting.
|
|
111
|
+
*
|
|
112
|
+
* Produces a compact overview with:
|
|
113
|
+
* - Overall score with grade emoji
|
|
114
|
+
* - Per-area score table
|
|
115
|
+
* - Cost summary (if available)
|
|
116
|
+
* - Promptfoo link (if available)
|
|
117
|
+
*/
|
|
118
|
+
export function formatScoreSummary(report) {
|
|
119
|
+
const { provenance, summary } = report;
|
|
120
|
+
const overall = Math.round(summary.overall.avgScore);
|
|
121
|
+
const blocks = [];
|
|
122
|
+
// Header — overall score with emoji
|
|
123
|
+
blocks.push({
|
|
124
|
+
text: {
|
|
125
|
+
text: `${gradeEmoji(overall)} *AI Literacy Score: ${overall}*`,
|
|
126
|
+
type: "mrkdwn",
|
|
127
|
+
},
|
|
128
|
+
type: "section",
|
|
129
|
+
});
|
|
130
|
+
// Context — mode, source
|
|
131
|
+
const contextElements = [
|
|
132
|
+
{ text: `*Mode:* ${provenance.mode}`, type: "mrkdwn" },
|
|
133
|
+
{ text: `*Source:* ${provenance.source.name}`, type: "mrkdwn" },
|
|
134
|
+
{ text: `*Date:* ${readableDate(summary.timestamp)}`, type: "mrkdwn" },
|
|
135
|
+
];
|
|
136
|
+
blocks.push({ elements: contextElements, type: "context" });
|
|
137
|
+
// Divider
|
|
138
|
+
blocks.push({ type: "divider" });
|
|
139
|
+
// Per-area score table as markdown
|
|
140
|
+
const rows = summary.scores
|
|
141
|
+
.map((s) => {
|
|
142
|
+
const emoji = gradeEmoji(s.totalScore);
|
|
143
|
+
return `${emoji} *${s.feature}*: ${Math.round(s.totalScore)} _(T:${Math.round(s.taskCompletion)} · C:${Math.round(s.codeCorrectness)} · D:${Math.round(s.docCoverage)})_`;
|
|
144
|
+
})
|
|
145
|
+
.join("\n");
|
|
146
|
+
blocks.push({
|
|
147
|
+
text: { text: rows, type: "mrkdwn" },
|
|
148
|
+
type: "section",
|
|
149
|
+
});
|
|
150
|
+
// Cost summary
|
|
151
|
+
if (summary.overall.cost) {
|
|
152
|
+
blocks.push({
|
|
153
|
+
text: {
|
|
154
|
+
text: `💰 Total cost: $${summary.overall.cost.total.toFixed(2)} ($${summary.overall.cost.perTest.toFixed(3)}/test)`,
|
|
155
|
+
type: "mrkdwn",
|
|
156
|
+
},
|
|
157
|
+
type: "section",
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
// Promptfoo link
|
|
161
|
+
if (provenance.promptfooUrl) {
|
|
162
|
+
blocks.push({
|
|
163
|
+
text: {
|
|
164
|
+
text: `🔗 <${provenance.promptfooUrl}|View full results in Promptfoo>`,
|
|
165
|
+
type: "mrkdwn",
|
|
166
|
+
},
|
|
167
|
+
type: "section",
|
|
168
|
+
});
|
|
169
|
+
}
|
|
170
|
+
return {
|
|
171
|
+
blocks,
|
|
172
|
+
text: `${gradeEmoji(overall)} AI Literacy Score: ${overall}`,
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Format a weekly digest summary for Slack.
|
|
177
|
+
*
|
|
178
|
+
* Produces a summary message covering score trends over a time window:
|
|
179
|
+
* - Header with overall trend direction and score
|
|
180
|
+
* - Per-area trend table with arrows
|
|
181
|
+
* - Lists of improved, regressed, and stable areas
|
|
182
|
+
* - Report count and time window metadata
|
|
183
|
+
*/
|
|
184
|
+
export function formatWeeklyDigest(digest) {
|
|
185
|
+
const trendEmoji = digest.overallTrend === "improving"
|
|
186
|
+
? "📈"
|
|
187
|
+
: digest.overallTrend === "regressing"
|
|
188
|
+
? "📉"
|
|
189
|
+
: "➡️";
|
|
190
|
+
const blocks = [];
|
|
191
|
+
// Header — overall trend
|
|
192
|
+
blocks.push({
|
|
193
|
+
text: {
|
|
194
|
+
text: `${trendEmoji} *Weekly AI Literacy Digest*\n` +
|
|
195
|
+
`Overall: ${Math.round(digest.overallLatest)} (${formatDelta(Math.round(digest.overallDelta))} this week)`,
|
|
196
|
+
type: "mrkdwn",
|
|
197
|
+
},
|
|
198
|
+
type: "section",
|
|
199
|
+
});
|
|
200
|
+
// Context — time window and report count
|
|
201
|
+
blocks.push({
|
|
202
|
+
elements: [
|
|
203
|
+
{
|
|
204
|
+
text: `*Period:* ${readableDate(digest.lookbackStart)} – ${readableDate(digest.lookbackEnd)}`,
|
|
205
|
+
type: "mrkdwn",
|
|
206
|
+
},
|
|
207
|
+
{
|
|
208
|
+
text: `*Reports:* ${digest.reportCount}`,
|
|
209
|
+
type: "mrkdwn",
|
|
210
|
+
},
|
|
211
|
+
],
|
|
212
|
+
type: "context",
|
|
213
|
+
});
|
|
214
|
+
blocks.push({ type: "divider" });
|
|
215
|
+
// Per-area trend table
|
|
216
|
+
if (digest.areaTrends.length > 0) {
|
|
217
|
+
const rows = digest.areaTrends
|
|
218
|
+
.map((t) => {
|
|
219
|
+
const arrow = t.trend === "improving" ? "↑" : t.trend === "regressing" ? "↓" : "→";
|
|
220
|
+
const emoji = gradeEmoji(t.lastScore);
|
|
221
|
+
return `${emoji} *${t.area}*: ${Math.round(t.lastScore)} ${arrow} (${formatDelta(Math.round(t.scoreDelta))})`;
|
|
222
|
+
})
|
|
223
|
+
.join("\n");
|
|
224
|
+
blocks.push({
|
|
225
|
+
text: { text: rows, type: "mrkdwn" },
|
|
226
|
+
type: "section",
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
// Summary badges
|
|
230
|
+
if (digest.improved.length > 0) {
|
|
231
|
+
blocks.push({
|
|
232
|
+
text: {
|
|
233
|
+
text: `📈 *Improved:* ${digest.improved.join(", ")}`,
|
|
234
|
+
type: "mrkdwn",
|
|
235
|
+
},
|
|
236
|
+
type: "section",
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
if (digest.regressed.length > 0) {
|
|
240
|
+
blocks.push({
|
|
241
|
+
text: {
|
|
242
|
+
text: `📉 *Regressed:* ${digest.regressed.join(", ")}`,
|
|
243
|
+
type: "mrkdwn",
|
|
244
|
+
},
|
|
245
|
+
type: "section",
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
if (digest.stable.length > 0) {
|
|
249
|
+
blocks.push({
|
|
250
|
+
text: {
|
|
251
|
+
text: `➡️ *Stable:* ${digest.stable.join(", ")}`,
|
|
252
|
+
type: "mrkdwn",
|
|
253
|
+
},
|
|
254
|
+
type: "section",
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
// Cost summary if available
|
|
258
|
+
if (digest.totalCost !== undefined) {
|
|
259
|
+
blocks.push({
|
|
260
|
+
text: {
|
|
261
|
+
text: `💰 Total evaluation cost this week: ${digest.totalCost.toFixed(2)}`,
|
|
262
|
+
type: "mrkdwn",
|
|
263
|
+
},
|
|
264
|
+
type: "section",
|
|
265
|
+
});
|
|
266
|
+
}
|
|
267
|
+
return {
|
|
268
|
+
blocks,
|
|
269
|
+
text: `${trendEmoji} Weekly AI Literacy Digest: ${Math.round(digest.overallLatest)} (${formatDelta(Math.round(digest.overallDelta))})`,
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
/** Build a dimension breakdown string for an area delta */
|
|
273
|
+
function dimensionBreakdown(dimensions) {
|
|
274
|
+
return [
|
|
275
|
+
`Task: ${formatDelta(dimensions.taskCompletion.delta)}`,
|
|
276
|
+
`Code: ${formatDelta(dimensions.codeCorrectness.delta)}`,
|
|
277
|
+
`Docs: ${formatDelta(dimensions.docCoverage.delta)}`,
|
|
278
|
+
].join(" · ");
|
|
279
|
+
}
|
|
280
|
+
/** Format a numeric delta with explicit sign: "+4", "-2", or "0" */
|
|
281
|
+
function formatDelta(n) {
|
|
282
|
+
if (n > 0)
|
|
283
|
+
return `+${n}`;
|
|
284
|
+
if (n < 0)
|
|
285
|
+
return `${n}`;
|
|
286
|
+
return "0";
|
|
287
|
+
}
|
|
288
|
+
/** Score-tier emoji: ✅ (≥80), 🟡 (≥70), 🟠 (≥50), 🔴 (<50) */
|
|
289
|
+
function gradeEmoji(score) {
|
|
290
|
+
if (score >= 80)
|
|
291
|
+
return "✅";
|
|
292
|
+
if (score >= 70)
|
|
293
|
+
return "🟡";
|
|
294
|
+
if (score >= 50)
|
|
295
|
+
return "🟠";
|
|
296
|
+
return "🔴";
|
|
297
|
+
}
|
|
298
|
+
/** Format an ISO timestamp into a readable date string */
|
|
299
|
+
function readableDate(iso) {
|
|
300
|
+
const d = new Date(iso);
|
|
301
|
+
return d.toLocaleDateString("en-US", {
|
|
302
|
+
day: "numeric",
|
|
303
|
+
month: "short",
|
|
304
|
+
year: "numeric",
|
|
305
|
+
});
|
|
306
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sinks/index.ts
|
|
3
|
+
*
|
|
4
|
+
* Barrel exports for the report sink module.
|
|
5
|
+
*
|
|
6
|
+
* Framework types and utilities live at this level. Concrete sink
|
|
7
|
+
* implementations live in subdirectories (bigquery/, slack/, webhook/).
|
|
8
|
+
*
|
|
9
|
+
* @see docs/design-docs/report-store/sink-architecture.md
|
|
10
|
+
*/
|
|
11
|
+
export { BigQuerySink } from "./bigquery/index.js";
|
|
12
|
+
export type { AreaScoreRow, BigQuerySinkOptions, ReportRow, } from "./bigquery/index.js";
|
|
13
|
+
export { flattenAreaScoreRows, flattenReportRow } from "./bigquery/index.js";
|
|
14
|
+
export { loadSinks } from "./loader.js";
|
|
15
|
+
export { withRetry } from "./retry.js";
|
|
16
|
+
export type { SinkRouting } from "./schema.js";
|
|
17
|
+
export { formatRegressionAlert, formatScoreSummary, formatWeeklyDigest, } from "./slack/format.js";
|
|
18
|
+
export type { SlackMessage } from "./slack/format.js";
|
|
19
|
+
export { SlackSink } from "./slack/index.js";
|
|
20
|
+
export type { SlackSinkOptions } from "./slack/index.js";
|
|
21
|
+
export type { ReportSink } from "./types.js";
|
|
22
|
+
export { shouldDeliver } from "./types.js";
|
|
23
|
+
export { WebhookSink } from "./webhook/index.js";
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sinks/index.ts
|
|
3
|
+
*
|
|
4
|
+
* Barrel exports for the report sink module.
|
|
5
|
+
*
|
|
6
|
+
* Framework types and utilities live at this level. Concrete sink
|
|
7
|
+
* implementations live in subdirectories (bigquery/, slack/, webhook/).
|
|
8
|
+
*
|
|
9
|
+
* @see docs/design-docs/report-store/sink-architecture.md
|
|
10
|
+
*/
|
|
11
|
+
export { BigQuerySink } from "./bigquery/index.js";
|
|
12
|
+
export { flattenAreaScoreRows, flattenReportRow } from "./bigquery/index.js";
|
|
13
|
+
export { loadSinks } from "./loader.js";
|
|
14
|
+
export { withRetry } from "./retry.js";
|
|
15
|
+
export { formatRegressionAlert, formatScoreSummary, formatWeeklyDigest, } from "./slack/format.js";
|
|
16
|
+
export { SlackSink } from "./slack/index.js";
|
|
17
|
+
export { shouldDeliver } from "./types.js";
|
|
18
|
+
export { WebhookSink } from "./webhook/index.js";
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sink loader — reads config/sinks.yaml, resolves env vars, validates,
|
|
3
|
+
* and instantiates sink objects.
|
|
4
|
+
*
|
|
5
|
+
* Fire-and-forget: parse failures log a warning and return an empty array
|
|
6
|
+
* so the pipeline never crashes due to sink misconfiguration.
|
|
7
|
+
*
|
|
8
|
+
* @see docs/design-docs/report-store/sink-architecture.md
|
|
9
|
+
*/
|
|
10
|
+
import type { ReportSink } from "./types.js";
|
|
11
|
+
/**
|
|
12
|
+
* Load and instantiate all enabled sinks from `config/sinks.yaml`.
|
|
13
|
+
*
|
|
14
|
+
* Returns an empty array when:
|
|
15
|
+
* - The config file doesn't exist (no sinks configured)
|
|
16
|
+
* - The config file fails to parse or validate
|
|
17
|
+
*/
|
|
18
|
+
export declare function loadSinks(): ReportSink[];
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sink loader — reads config/sinks.yaml, resolves env vars, validates,
|
|
3
|
+
* and instantiates sink objects.
|
|
4
|
+
*
|
|
5
|
+
* Fire-and-forget: parse failures log a warning and return an empty array
|
|
6
|
+
* so the pipeline never crashes due to sink misconfiguration.
|
|
7
|
+
*
|
|
8
|
+
* @see docs/design-docs/report-store/sink-architecture.md
|
|
9
|
+
*/
|
|
10
|
+
import { existsSync, readFileSync } from "fs";
|
|
11
|
+
import { dirname, resolve } from "path";
|
|
12
|
+
import { fileURLToPath } from "url";
|
|
13
|
+
import { load } from "js-yaml";
|
|
14
|
+
import { interpolate } from "../interpolate.js";
|
|
15
|
+
import { BigQuerySink } from "./bigquery/index.js";
|
|
16
|
+
import { SinksFileSchema } from "./schema.js";
|
|
17
|
+
import { SlackSink } from "./slack/index.js";
|
|
18
|
+
import { WebhookSink } from "./webhook/index.js";
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// Paths
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
23
|
+
const SINKS_CONFIG_PATH = resolve(__dirname, "..", "..", "config", "sinks.yaml");
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
// Public API
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
/**
|
|
28
|
+
* Load and instantiate all enabled sinks from `config/sinks.yaml`.
|
|
29
|
+
*
|
|
30
|
+
* Returns an empty array when:
|
|
31
|
+
* - The config file doesn't exist (no sinks configured)
|
|
32
|
+
* - The config file fails to parse or validate
|
|
33
|
+
*/
|
|
34
|
+
export function loadSinks() {
|
|
35
|
+
if (!existsSync(SINKS_CONFIG_PATH)) {
|
|
36
|
+
return [];
|
|
37
|
+
}
|
|
38
|
+
try {
|
|
39
|
+
const raw = readFileSync(SINKS_CONFIG_PATH, "utf-8");
|
|
40
|
+
const parsed = load(raw);
|
|
41
|
+
const interpolated = interpolate(parsed);
|
|
42
|
+
// When all sinks are commented out, js-yaml returns { sinks: null }.
|
|
43
|
+
// Normalize null → undefined so Zod's .default([]) can apply.
|
|
44
|
+
const normalized = interpolated && typeof interpolated === "object"
|
|
45
|
+
? Object.fromEntries(Object.entries(interpolated).map(([k, v]) => [k, v === null ? undefined : v]))
|
|
46
|
+
: interpolated;
|
|
47
|
+
const validated = SinksFileSchema.parse(normalized);
|
|
48
|
+
return validated.sinks
|
|
49
|
+
.filter((s) => s.enabled)
|
|
50
|
+
.map((s) => createSink(s))
|
|
51
|
+
.filter((sink) => sink !== null);
|
|
52
|
+
}
|
|
53
|
+
catch (error) {
|
|
54
|
+
console.warn("Failed to load sinks config — continuing without sinks:", error instanceof Error ? error.message : error);
|
|
55
|
+
return [];
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
// ---------------------------------------------------------------------------
|
|
59
|
+
// Factory
|
|
60
|
+
// ---------------------------------------------------------------------------
|
|
61
|
+
function createSink(config) {
|
|
62
|
+
switch (config.type) {
|
|
63
|
+
case "bigquery":
|
|
64
|
+
return new BigQuerySink({
|
|
65
|
+
credentials: config.credentials,
|
|
66
|
+
dataset: config.dataset,
|
|
67
|
+
project: config.project,
|
|
68
|
+
});
|
|
69
|
+
case "github-comment":
|
|
70
|
+
console.log(" ℹ️ GitHubCommentSink not yet implemented — skipping");
|
|
71
|
+
return null;
|
|
72
|
+
case "slack":
|
|
73
|
+
return new SlackSink(config.webhookUrl, config.channel);
|
|
74
|
+
case "webhook":
|
|
75
|
+
return new WebhookSink(config.url, config.headers ?? {});
|
|
76
|
+
default: {
|
|
77
|
+
const _exhaustive = config;
|
|
78
|
+
console.warn(` ⚠️ Unknown sink type: ${_exhaustive.type}`);
|
|
79
|
+
return null;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sinks/retry.ts
|
|
3
|
+
*
|
|
4
|
+
* Simple retry utility with exponential backoff for sink operations.
|
|
5
|
+
* Used by the sink runner to retry transient failures (network timeouts,
|
|
6
|
+
* HTTP 429/5xx) without requiring sinks to implement retry logic internally.
|
|
7
|
+
*
|
|
8
|
+
* Policy: 3 retries, exponential backoff (1s → 2s → 4s).
|
|
9
|
+
*
|
|
10
|
+
* @see docs/design-docs/report-store/sink-architecture.md
|
|
11
|
+
*/
|
|
12
|
+
import type { SinkResult } from "../pipeline/types.js";
|
|
13
|
+
/**
|
|
14
|
+
* Retry a sink publish operation with exponential backoff.
|
|
15
|
+
*
|
|
16
|
+
* Only retries on "failed" results — "success" and "skipped" are returned
|
|
17
|
+
* immediately. Thrown exceptions are caught and converted to "failed" results.
|
|
18
|
+
*
|
|
19
|
+
* @param fn - The publish function to retry
|
|
20
|
+
* @param maxRetries - Maximum number of retry attempts (default: 3)
|
|
21
|
+
* @param baseDelayMs - Initial delay in milliseconds (default: 1000)
|
|
22
|
+
* @returns The final SinkResult after all attempts
|
|
23
|
+
*/
|
|
24
|
+
export declare function withRetry(fn: () => Promise<SinkResult>, maxRetries?: number, baseDelayMs?: number): Promise<SinkResult>;
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sinks/retry.ts
|
|
3
|
+
*
|
|
4
|
+
* Simple retry utility with exponential backoff for sink operations.
|
|
5
|
+
* Used by the sink runner to retry transient failures (network timeouts,
|
|
6
|
+
* HTTP 429/5xx) without requiring sinks to implement retry logic internally.
|
|
7
|
+
*
|
|
8
|
+
* Policy: 3 retries, exponential backoff (1s → 2s → 4s).
|
|
9
|
+
*
|
|
10
|
+
* @see docs/design-docs/report-store/sink-architecture.md
|
|
11
|
+
*/
|
|
12
|
+
const DEFAULT_MAX_RETRIES = 3;
|
|
13
|
+
const DEFAULT_BASE_DELAY_MS = 1000;
|
|
14
|
+
/**
|
|
15
|
+
* Retry a sink publish operation with exponential backoff.
|
|
16
|
+
*
|
|
17
|
+
* Only retries on "failed" results — "success" and "skipped" are returned
|
|
18
|
+
* immediately. Thrown exceptions are caught and converted to "failed" results.
|
|
19
|
+
*
|
|
20
|
+
* @param fn - The publish function to retry
|
|
21
|
+
* @param maxRetries - Maximum number of retry attempts (default: 3)
|
|
22
|
+
* @param baseDelayMs - Initial delay in milliseconds (default: 1000)
|
|
23
|
+
* @returns The final SinkResult after all attempts
|
|
24
|
+
*/
|
|
25
|
+
export async function withRetry(fn, maxRetries = DEFAULT_MAX_RETRIES, baseDelayMs = DEFAULT_BASE_DELAY_MS) {
|
|
26
|
+
let lastResult = { error: "No attempts made", status: "failed" };
|
|
27
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
28
|
+
try {
|
|
29
|
+
const result = await fn();
|
|
30
|
+
// Success and skipped are terminal — no retry
|
|
31
|
+
if (result.status !== "failed") {
|
|
32
|
+
return result;
|
|
33
|
+
}
|
|
34
|
+
lastResult = result;
|
|
35
|
+
}
|
|
36
|
+
catch (error) {
|
|
37
|
+
lastResult = {
|
|
38
|
+
error: error instanceof Error ? error.message : String(error),
|
|
39
|
+
status: "failed",
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
// Don't delay after the last attempt
|
|
43
|
+
if (attempt < maxRetries) {
|
|
44
|
+
const delayMs = baseDelayMs * Math.pow(2, attempt);
|
|
45
|
+
await sleep(delayMs);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return lastResult;
|
|
49
|
+
}
|
|
50
|
+
function sleep(ms) {
|
|
51
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
52
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sinks/schema.ts — Re-export barrel
|
|
3
|
+
*
|
|
4
|
+
* All sink Zod schemas now live in @sanity/ailf-core. This file
|
|
5
|
+
* re-exports them for backward compatibility.
|
|
6
|
+
*
|
|
7
|
+
* @see packages/core/src/schemas/sinks.ts (canonical source)
|
|
8
|
+
*/
|
|
9
|
+
export * from "../_vendor/ailf-core/index.d.ts";
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sinks/schema.ts — Re-export barrel
|
|
3
|
+
*
|
|
4
|
+
* All sink Zod schemas now live in @sanity/ailf-core. This file
|
|
5
|
+
* re-exports them for backward compatibility.
|
|
6
|
+
*
|
|
7
|
+
* @see packages/core/src/schemas/sinks.ts (canonical source)
|
|
8
|
+
*/
|
|
9
|
+
export * from "../_vendor/ailf-core/index.js";
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sinks/slack/format.ts
|
|
3
|
+
*
|
|
4
|
+
* Formats evaluation report data into Slack Block Kit structures for the
|
|
5
|
+
* SlackSink. Provides three message formats:
|
|
6
|
+
*
|
|
7
|
+
* - `formatRegressionAlert` — detailed regression notification with
|
|
8
|
+
* per-area dimension breakdowns
|
|
9
|
+
* - `formatScoreSummary` — compact score overview for general reporting
|
|
10
|
+
* - `formatWeeklyDigest` — weekly trend summary for scheduled digests
|
|
11
|
+
*
|
|
12
|
+
* @see docs/design-docs/report-store/sink-architecture.md
|
|
13
|
+
*/
|
|
14
|
+
import type { Report } from "../../pipeline/types.js";
|
|
15
|
+
import type { DigestSummary } from "../../schedules/digest.js";
|
|
16
|
+
export interface SlackMessage {
|
|
17
|
+
blocks: SlackBlock[];
|
|
18
|
+
text: string;
|
|
19
|
+
}
|
|
20
|
+
interface SlackBlock {
|
|
21
|
+
elements?: {
|
|
22
|
+
text: string;
|
|
23
|
+
type: "mrkdwn" | "plain_text";
|
|
24
|
+
}[];
|
|
25
|
+
fields?: {
|
|
26
|
+
text: string;
|
|
27
|
+
type: "mrkdwn" | "plain_text";
|
|
28
|
+
}[];
|
|
29
|
+
text?: {
|
|
30
|
+
text: string;
|
|
31
|
+
type: "mrkdwn" | "plain_text";
|
|
32
|
+
};
|
|
33
|
+
type: "context" | "divider" | "header" | "section";
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Format a regression alert for areas that have regressed.
|
|
37
|
+
*
|
|
38
|
+
* Produces a rich Slack message with:
|
|
39
|
+
* - Header with overall score change
|
|
40
|
+
* - Context metadata (mode, source, timestamp, promptfoo link)
|
|
41
|
+
* - Per-area regression details with dimension breakdowns
|
|
42
|
+
* - Brief mentions of improved and unchanged areas
|
|
43
|
+
*/
|
|
44
|
+
export declare function formatRegressionAlert(report: Report): SlackMessage;
|
|
45
|
+
/**
|
|
46
|
+
* Format a general score summary for Slack reporting.
|
|
47
|
+
*
|
|
48
|
+
* Produces a compact overview with:
|
|
49
|
+
* - Overall score with grade emoji
|
|
50
|
+
* - Per-area score table
|
|
51
|
+
* - Cost summary (if available)
|
|
52
|
+
* - Promptfoo link (if available)
|
|
53
|
+
*/
|
|
54
|
+
export declare function formatScoreSummary(report: Report): SlackMessage;
|
|
55
|
+
/**
|
|
56
|
+
* Format a weekly digest summary for Slack.
|
|
57
|
+
*
|
|
58
|
+
* Produces a summary message covering score trends over a time window:
|
|
59
|
+
* - Header with overall trend direction and score
|
|
60
|
+
* - Per-area trend table with arrows
|
|
61
|
+
* - Lists of improved, regressed, and stable areas
|
|
62
|
+
* - Report count and time window metadata
|
|
63
|
+
*/
|
|
64
|
+
export declare function formatWeeklyDigest(digest: DigestSummary): SlackMessage;
|
|
65
|
+
export {};
|