@sanity/ailf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -0
- package/bin/ailf.js +64 -0
- package/canonical/grader-references/README.md +88 -0
- package/canonical/grader-references/groq.yaml +234 -0
- package/canonical/grader-references/studio-setup.yaml +275 -0
- package/canonical/reference-solutions/.gitkeep +1 -0
- package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
- package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
- package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
- package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
- package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
- package/canonical/reference-solutions/groq/joins-references.ts +300 -0
- package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
- package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
- package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
- package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
- package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
- package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
- package/config/bigquery/README.md +74 -0
- package/config/bigquery/views/area_scores.sql +87 -0
- package/config/bigquery/views/reports.sql +49 -0
- package/config/features.yaml +116 -0
- package/config/models.yaml +115 -0
- package/config/prompts.yaml +75 -0
- package/config/rubrics.yaml +62 -0
- package/config/schedules.yaml +43 -0
- package/config/sinks.yaml +54 -0
- package/config/sources.yaml +51 -0
- package/config/thresholds.yaml +49 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
- package/dist/_vendor/ailf-core/examples/index.js +285 -0
- package/dist/_vendor/ailf-core/index.d.ts +17 -0
- package/dist/_vendor/ailf-core/index.js +17 -0
- package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
- package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
- package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
- package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
- package/dist/_vendor/ailf-core/ports/context.js +14 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
- package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
- package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
- package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
- package/dist/_vendor/ailf-core/ports/index.js +7 -0
- package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
- package/dist/_vendor/ailf-core/ports/logger.js +11 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
- package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
- package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
- package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
- package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
- package/dist/_vendor/ailf-core/schemas/index.js +16 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
- package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
- package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
- package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
- package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
- package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
- package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
- package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
- package/dist/_vendor/ailf-core/services/index.js +12 -0
- package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
- package/dist/_vendor/ailf-core/services/scoring.js +222 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
- package/dist/_vendor/ailf-core/types/index.js +21 -0
- package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
- package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
- package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
- package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
- package/dist/_vendor/ailf-shared/document-ref.js +1 -0
- package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
- package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
- package/dist/_vendor/ailf-shared/index.d.ts +16 -0
- package/dist/_vendor/ailf-shared/index.js +16 -0
- package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
- package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
- package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
- package/dist/_vendor/ailf-shared/score-grades.js +23 -0
- package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
- package/dist/adapters/cache/content-lake-cache.js +59 -0
- package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
- package/dist/adapters/cache/filesystem-cache.js +54 -0
- package/dist/adapters/cache/index.d.ts +2 -0
- package/dist/adapters/cache/index.js +2 -0
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
- package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
- package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
- package/dist/adapters/config-sources/file-config-adapter.js +96 -0
- package/dist/adapters/config-sources/index.d.ts +2 -0
- package/dist/adapters/config-sources/index.js +2 -0
- package/dist/adapters/doc-fetchers/index.d.ts +1 -0
- package/dist/adapters/doc-fetchers/index.js +1 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
- package/dist/adapters/eval-runners/index.d.ts +1 -0
- package/dist/adapters/eval-runners/index.js +1 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
- package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
- package/dist/adapters/index.d.ts +12 -0
- package/dist/adapters/index.js +12 -0
- package/dist/adapters/loggers/console-logger.d.ts +22 -0
- package/dist/adapters/loggers/console-logger.js +54 -0
- package/dist/adapters/loggers/index.d.ts +9 -0
- package/dist/adapters/loggers/index.js +9 -0
- package/dist/adapters/loggers/json-logger.d.ts +18 -0
- package/dist/adapters/loggers/json-logger.js +33 -0
- package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
- package/dist/adapters/loggers/quiet-logger.js +30 -0
- package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/composite-task-source.js +59 -0
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
- package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
- package/dist/adapters/task-sources/index.d.ts +7 -0
- package/dist/adapters/task-sources/index.js +7 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
- package/dist/adapters/task-sources/repo-schemas.js +234 -0
- package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
- package/dist/adapters/task-sources/repo-task-source.js +104 -0
- package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
- package/dist/adapters/task-sources/repo-trigger.js +153 -0
- package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
- package/dist/adapters/task-sources/repo-validation.js +164 -0
- package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
- package/dist/adapters/task-sources/yaml-task-source.js +136 -0
- package/dist/agent-observer/agentic-provider.d.ts +132 -0
- package/dist/agent-observer/agentic-provider.js +983 -0
- package/dist/agent-observer/classifier.d.ts +62 -0
- package/dist/agent-observer/classifier.js +269 -0
- package/dist/agent-observer/index.d.ts +7 -0
- package/dist/agent-observer/index.js +4 -0
- package/dist/agent-observer/pricing.d.ts +35 -0
- package/dist/agent-observer/pricing.js +82 -0
- package/dist/agent-observer/provider.d.ts +77 -0
- package/dist/agent-observer/provider.js +151 -0
- package/dist/agent-observer/proxy.d.ts +91 -0
- package/dist/agent-observer/proxy.js +321 -0
- package/dist/agent-observer/test-imports.d.ts +7 -0
- package/dist/agent-observer/test-imports.js +185 -0
- package/dist/agent-observer/types.d.ts +137 -0
- package/dist/agent-observer/types.js +16 -0
- package/dist/assertions/source-isolation.d.ts +72 -0
- package/dist/assertions/source-isolation.js +117 -0
- package/dist/cli.d.ts +24 -0
- package/dist/cli.js +199 -0
- package/dist/commands/agent-report.d.ts +5 -0
- package/dist/commands/agent-report.js +69 -0
- package/dist/commands/baseline.d.ts +9 -0
- package/dist/commands/baseline.js +141 -0
- package/dist/commands/cache.d.ts +13 -0
- package/dist/commands/cache.js +135 -0
- package/dist/commands/calculate-scores.d.ts +8 -0
- package/dist/commands/calculate-scores.js +48 -0
- package/dist/commands/compare.d.ts +8 -0
- package/dist/commands/compare.js +120 -0
- package/dist/commands/completion.d.ts +18 -0
- package/dist/commands/completion.js +260 -0
- package/dist/commands/coverage-audit.d.ts +7 -0
- package/dist/commands/coverage-audit.js +40 -0
- package/dist/commands/discovery-report.d.ts +10 -0
- package/dist/commands/discovery-report.js +44 -0
- package/dist/commands/eval.d.ts +9 -0
- package/dist/commands/eval.js +35 -0
- package/dist/commands/explain-handler.d.ts +34 -0
- package/dist/commands/explain-handler.js +719 -0
- package/dist/commands/fetch-docs.d.ts +8 -0
- package/dist/commands/fetch-docs.js +128 -0
- package/dist/commands/generate-configs.d.ts +8 -0
- package/dist/commands/generate-configs.js +46 -0
- package/dist/commands/grader/index.d.ts +11 -0
- package/dist/commands/grader/index.js +118 -0
- package/dist/commands/init.d.ts +19 -0
- package/dist/commands/init.js +150 -0
- package/dist/commands/interactive.d.ts +12 -0
- package/dist/commands/interactive.js +238 -0
- package/dist/commands/lookup-doc.d.ts +15 -0
- package/dist/commands/lookup-doc.js +84 -0
- package/dist/commands/measure-retrieval.d.ts +5 -0
- package/dist/commands/measure-retrieval.js +65 -0
- package/dist/commands/pipeline-action.d.ts +71 -0
- package/dist/commands/pipeline-action.js +305 -0
- package/dist/commands/pipeline.d.ts +62 -0
- package/dist/commands/pipeline.js +53 -0
- package/dist/commands/pr-comment.d.ts +8 -0
- package/dist/commands/pr-comment.js +47 -0
- package/dist/commands/publish.d.ts +26 -0
- package/dist/commands/publish.js +253 -0
- package/dist/commands/readiness-report.d.ts +10 -0
- package/dist/commands/readiness-report.js +104 -0
- package/dist/commands/shared/options.d.ts +29 -0
- package/dist/commands/shared/options.js +57 -0
- package/dist/commands/update-quality-scores.d.ts +5 -0
- package/dist/commands/update-quality-scores.js +20 -0
- package/dist/commands/validate-tasks.d.ts +16 -0
- package/dist/commands/validate-tasks.js +93 -0
- package/dist/commands/validate.d.ts +9 -0
- package/dist/commands/validate.js +73 -0
- package/dist/commands/webhook-server.d.ts +5 -0
- package/dist/commands/webhook-server.js +30 -0
- package/dist/commands/weekly-digest.d.ts +10 -0
- package/dist/commands/weekly-digest.js +104 -0
- package/dist/composition-root.d.ts +26 -0
- package/dist/composition-root.js +107 -0
- package/dist/interpolate.d.ts +26 -0
- package/dist/interpolate.js +70 -0
- package/dist/job-store.d.ts +104 -0
- package/dist/job-store.js +188 -0
- package/dist/lib/agent-behavior-report.d.ts +8 -0
- package/dist/lib/agent-behavior-report.js +185 -0
- package/dist/lib/baseline.d.ts +19 -0
- package/dist/lib/baseline.js +153 -0
- package/dist/lib/calculate-scores.d.ts +23 -0
- package/dist/lib/calculate-scores.js +42 -0
- package/dist/lib/compare.d.ts +18 -0
- package/dist/lib/compare.js +170 -0
- package/dist/lib/coverage-audit.d.ts +4 -0
- package/dist/lib/coverage-audit.js +42 -0
- package/dist/lib/discovery-report.d.ts +13 -0
- package/dist/lib/discovery-report.js +57 -0
- package/dist/lib/fetch-docs.d.ts +30 -0
- package/dist/lib/fetch-docs.js +171 -0
- package/dist/lib/generate-configs.d.ts +25 -0
- package/dist/lib/generate-configs.js +42 -0
- package/dist/lib/grader-api.d.ts +21 -0
- package/dist/lib/grader-api.js +34 -0
- package/dist/lib/grader-compare.d.ts +19 -0
- package/dist/lib/grader-compare.js +91 -0
- package/dist/lib/grader-consistency.d.ts +27 -0
- package/dist/lib/grader-consistency.js +79 -0
- package/dist/lib/grader-sensitivity.d.ts +19 -0
- package/dist/lib/grader-sensitivity.js +75 -0
- package/dist/lib/grader-validate.d.ts +19 -0
- package/dist/lib/grader-validate.js +78 -0
- package/dist/lib/measure-retrieval.d.ts +14 -0
- package/dist/lib/measure-retrieval.js +71 -0
- package/dist/lib/pr-comment.d.ts +16 -0
- package/dist/lib/pr-comment.js +28 -0
- package/dist/lib/readiness-report.d.ts +13 -0
- package/dist/lib/readiness-report.js +108 -0
- package/dist/lib/webhook-server.d.ts +11 -0
- package/dist/lib/webhook-server.js +24 -0
- package/dist/lib/weekly-digest.d.ts +24 -0
- package/dist/lib/weekly-digest.js +148 -0
- package/dist/orchestration/build-app-context.d.ts +27 -0
- package/dist/orchestration/build-app-context.js +81 -0
- package/dist/orchestration/build-step-sequence.d.ts +15 -0
- package/dist/orchestration/build-step-sequence.js +84 -0
- package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
- package/dist/orchestration/config-to-source-overrides.js +28 -0
- package/dist/orchestration/env-bridge.d.ts +21 -0
- package/dist/orchestration/env-bridge.js +66 -0
- package/dist/orchestration/index.d.ts +11 -0
- package/dist/orchestration/index.js +11 -0
- package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
- package/dist/orchestration/pipeline-orchestrator.js +153 -0
- package/dist/orchestration/step-runner.d.ts +20 -0
- package/dist/orchestration/step-runner.js +88 -0
- package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
- package/dist/orchestration/steps/calculate-scores-step.js +95 -0
- package/dist/orchestration/steps/callback-step.d.ts +24 -0
- package/dist/orchestration/steps/callback-step.js +76 -0
- package/dist/orchestration/steps/compare-step.d.ts +14 -0
- package/dist/orchestration/steps/compare-step.js +92 -0
- package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
- package/dist/orchestration/steps/discovery-report-step.js +55 -0
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
- package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
- package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
- package/dist/orchestration/steps/fetch-docs-step.js +135 -0
- package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
- package/dist/orchestration/steps/gap-analysis-step.js +136 -0
- package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
- package/dist/orchestration/steps/generate-configs-step.js +85 -0
- package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
- package/dist/orchestration/steps/grader-consistency-step.js +64 -0
- package/dist/orchestration/steps/index.d.ts +19 -0
- package/dist/orchestration/steps/index.js +19 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
- package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
- package/dist/orchestration/steps/publish-report-step.js +216 -0
- package/dist/orchestration/steps/readiness-step.d.ts +13 -0
- package/dist/orchestration/steps/readiness-step.js +91 -0
- package/dist/orchestration/steps/report-step.d.ts +12 -0
- package/dist/orchestration/steps/report-step.js +49 -0
- package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
- package/dist/orchestration/steps/run-eval-step.js +195 -0
- package/dist/orchestration/steps/validate-step.d.ts +12 -0
- package/dist/orchestration/steps/validate-step.js +41 -0
- package/dist/pipeline/agent-behavior-report.d.ts +53 -0
- package/dist/pipeline/agent-behavior-report.js +132 -0
- package/dist/pipeline/attribution.d.ts +47 -0
- package/dist/pipeline/attribution.js +226 -0
- package/dist/pipeline/baseline.d.ts +37 -0
- package/dist/pipeline/baseline.js +141 -0
- package/dist/pipeline/cache.d.ts +101 -0
- package/dist/pipeline/cache.js +283 -0
- package/dist/pipeline/calculate-scores.d.ts +102 -0
- package/dist/pipeline/calculate-scores.js +1128 -0
- package/dist/pipeline/callback-delivery.d.ts +50 -0
- package/dist/pipeline/callback-delivery.js +89 -0
- package/dist/pipeline/checks.d.ts +39 -0
- package/dist/pipeline/checks.js +280 -0
- package/dist/pipeline/classify-url.d.ts +61 -0
- package/dist/pipeline/classify-url.js +93 -0
- package/dist/pipeline/compare.d.ts +31 -0
- package/dist/pipeline/compare.js +208 -0
- package/dist/pipeline/coverage-audit.d.ts +39 -0
- package/dist/pipeline/coverage-audit.js +165 -0
- package/dist/pipeline/degradations.d.ts +85 -0
- package/dist/pipeline/degradations.js +242 -0
- package/dist/pipeline/discovery-report.d.ts +55 -0
- package/dist/pipeline/discovery-report.js +178 -0
- package/dist/pipeline/eval-constants.d.ts +68 -0
- package/dist/pipeline/eval-constants.js +111 -0
- package/dist/pipeline/eval-fingerprint.d.ts +66 -0
- package/dist/pipeline/eval-fingerprint.js +175 -0
- package/dist/pipeline/expand-tasks.d.ts +220 -0
- package/dist/pipeline/expand-tasks.js +421 -0
- package/dist/pipeline/failure-modes.d.ts +46 -0
- package/dist/pipeline/failure-modes.js +348 -0
- package/dist/pipeline/fetch-url-content.d.ts +44 -0
- package/dist/pipeline/fetch-url-content.js +93 -0
- package/dist/pipeline/gap-analysis.d.ts +48 -0
- package/dist/pipeline/gap-analysis.js +231 -0
- package/dist/pipeline/generate-configs.d.ts +72 -0
- package/dist/pipeline/generate-configs.js +395 -0
- package/dist/pipeline/grader-api.d.ts +49 -0
- package/dist/pipeline/grader-api.js +200 -0
- package/dist/pipeline/grader-compare-runner.d.ts +44 -0
- package/dist/pipeline/grader-compare-runner.js +301 -0
- package/dist/pipeline/grader-comparison.d.ts +111 -0
- package/dist/pipeline/grader-comparison.js +161 -0
- package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
- package/dist/pipeline/grader-consistency-runner.js +270 -0
- package/dist/pipeline/grader-consistency.d.ts +103 -0
- package/dist/pipeline/grader-consistency.js +146 -0
- package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
- package/dist/pipeline/grader-sensitivity-runner.js +282 -0
- package/dist/pipeline/grader-sensitivity.d.ts +94 -0
- package/dist/pipeline/grader-sensitivity.js +144 -0
- package/dist/pipeline/grader-validate-runner.d.ts +38 -0
- package/dist/pipeline/grader-validate-runner.js +229 -0
- package/dist/pipeline/grader-validation.d.ts +107 -0
- package/dist/pipeline/grader-validation.js +169 -0
- package/dist/pipeline/map-request-to-config.d.ts +19 -0
- package/dist/pipeline/map-request-to-config.js +80 -0
- package/dist/pipeline/measure-retrieval.d.ts +59 -0
- package/dist/pipeline/measure-retrieval.js +111 -0
- package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
- package/dist/pipeline/mirror-repo-tasks.js +350 -0
- package/dist/pipeline/plan-format.d.ts +33 -0
- package/dist/pipeline/plan-format.js +202 -0
- package/dist/pipeline/plan.d.ts +169 -0
- package/dist/pipeline/plan.js +708 -0
- package/dist/pipeline/pr-comment.d.ts +19 -0
- package/dist/pipeline/pr-comment.js +502 -0
- package/dist/pipeline/probe.d.ts +52 -0
- package/dist/pipeline/probe.js +390 -0
- package/dist/pipeline/provenance.d.ts +47 -0
- package/dist/pipeline/provenance.js +146 -0
- package/dist/pipeline/readiness-report.d.ts +87 -0
- package/dist/pipeline/readiness-report.js +205 -0
- package/dist/pipeline/release-classification.d.ts +54 -0
- package/dist/pipeline/release-classification.js +238 -0
- package/dist/pipeline/release-report.d.ts +37 -0
- package/dist/pipeline/release-report.js +222 -0
- package/dist/pipeline/repo-eval-comment.d.ts +37 -0
- package/dist/pipeline/repo-eval-comment.js +165 -0
- package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
- package/dist/pipeline/repo-threshold-evaluator.js +162 -0
- package/dist/pipeline/resolve-mappings.d.ts +35 -0
- package/dist/pipeline/resolve-mappings.js +72 -0
- package/dist/pipeline/retrieval-metrics.d.ts +39 -0
- package/dist/pipeline/retrieval-metrics.js +136 -0
- package/dist/pipeline/reverse-mapping.d.ts +67 -0
- package/dist/pipeline/reverse-mapping.js +88 -0
- package/dist/pipeline/schemas.d.ts +9 -0
- package/dist/pipeline/schemas.js +9 -0
- package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/calculate-scores-step.js +89 -0
- package/dist/pipeline/steps/compare-step.d.ts +18 -0
- package/dist/pipeline/steps/compare-step.js +90 -0
- package/dist/pipeline/steps/eval-step.d.ts +53 -0
- package/dist/pipeline/steps/eval-step.js +347 -0
- package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
- package/dist/pipeline/steps/fetch-docs-step.js +84 -0
- package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
- package/dist/pipeline/steps/generate-configs-step.js +98 -0
- package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
- package/dist/pipeline/steps/grader-consistency-step.js +74 -0
- package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
- package/dist/pipeline/steps/publish-report-step.js +243 -0
- package/dist/pipeline/steps/report-step.d.ts +13 -0
- package/dist/pipeline/steps/report-step.js +56 -0
- package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
- package/dist/pipeline/steps/update-scores-step.js +42 -0
- package/dist/pipeline/targeted-loo.d.ts +88 -0
- package/dist/pipeline/targeted-loo.js +203 -0
- package/dist/pipeline/thresholds.d.ts +27 -0
- package/dist/pipeline/thresholds.js +245 -0
- package/dist/pipeline/types.d.ts +10 -0
- package/dist/pipeline/types.js +10 -0
- package/dist/pipeline/validate.d.ts +67 -0
- package/dist/pipeline/validate.js +406 -0
- package/dist/pipeline/webhook-server.d.ts +37 -0
- package/dist/pipeline/webhook-server.js +133 -0
- package/dist/report-store.d.ts +84 -0
- package/dist/report-store.js +208 -0
- package/dist/sanity/client.d.ts +38 -0
- package/dist/sanity/client.js +86 -0
- package/dist/sanity/portable-text.d.ts +11 -0
- package/dist/sanity/portable-text.js +211 -0
- package/dist/sanity/queries.d.ts +133 -0
- package/dist/sanity/queries.js +300 -0
- package/dist/schedules/digest.d.ts +116 -0
- package/dist/schedules/digest.js +156 -0
- package/dist/schedules/index.d.ts +12 -0
- package/dist/schedules/index.js +10 -0
- package/dist/schedules/loader.d.ts +31 -0
- package/dist/schedules/loader.js +73 -0
- package/dist/schedules/schema.d.ts +9 -0
- package/dist/schedules/schema.js +9 -0
- package/dist/scripts/agent-behavior-report.d.ts +19 -0
- package/dist/scripts/agent-behavior-report.js +315 -0
- package/dist/scripts/baseline.d.ts +43 -0
- package/dist/scripts/baseline.js +267 -0
- package/dist/scripts/calculate-scores.d.ts +166 -0
- package/dist/scripts/calculate-scores.js +1296 -0
- package/dist/scripts/compare.d.ts +22 -0
- package/dist/scripts/compare.js +334 -0
- package/dist/scripts/coverage-audit.d.ts +44 -0
- package/dist/scripts/coverage-audit.js +209 -0
- package/dist/scripts/debug-eval.d.ts +19 -0
- package/dist/scripts/debug-eval.js +73 -0
- package/dist/scripts/discovery-report.d.ts +58 -0
- package/dist/scripts/discovery-report.js +250 -0
- package/dist/scripts/fetch-docs.d.ts +35 -0
- package/dist/scripts/fetch-docs.js +472 -0
- package/dist/scripts/generate-configs.d.ts +66 -0
- package/dist/scripts/generate-configs.js +459 -0
- package/dist/scripts/grader-api.d.ts +27 -0
- package/dist/scripts/grader-api.js +206 -0
- package/dist/scripts/grader-compare.d.ts +22 -0
- package/dist/scripts/grader-compare.js +368 -0
- package/dist/scripts/grader-consistency.d.ts +20 -0
- package/dist/scripts/grader-consistency.js +313 -0
- package/dist/scripts/grader-sensitivity.d.ts +22 -0
- package/dist/scripts/grader-sensitivity.js +354 -0
- package/dist/scripts/grader-validate.d.ts +19 -0
- package/dist/scripts/grader-validate.js +267 -0
- package/dist/scripts/measure-retrieval.d.ts +10 -0
- package/dist/scripts/measure-retrieval.js +145 -0
- package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
- package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
- package/dist/scripts/pipeline.d.ts +76 -0
- package/dist/scripts/pipeline.js +1031 -0
- package/dist/scripts/pr-comment.d.ts +10 -0
- package/dist/scripts/pr-comment.js +510 -0
- package/dist/scripts/readiness-report.d.ts +88 -0
- package/dist/scripts/readiness-report.js +342 -0
- package/dist/scripts/update-quality-scores.d.ts +15 -0
- package/dist/scripts/update-quality-scores.js +184 -0
- package/dist/scripts/validate-task-sources.d.ts +21 -0
- package/dist/scripts/validate-task-sources.js +210 -0
- package/dist/scripts/validate.d.ts +13 -0
- package/dist/scripts/validate.js +79 -0
- package/dist/scripts/webhook-server.d.ts +26 -0
- package/dist/scripts/webhook-server.js +147 -0
- package/dist/scripts/weekly-digest.d.ts +24 -0
- package/dist/scripts/weekly-digest.js +144 -0
- package/dist/sinks/bigquery/index.d.ts +131 -0
- package/dist/sinks/bigquery/index.js +222 -0
- package/dist/sinks/format-slack.d.ts +64 -0
- package/dist/sinks/format-slack.js +306 -0
- package/dist/sinks/index.d.ts +23 -0
- package/dist/sinks/index.js +18 -0
- package/dist/sinks/loader.d.ts +18 -0
- package/dist/sinks/loader.js +82 -0
- package/dist/sinks/retry.d.ts +24 -0
- package/dist/sinks/retry.js +52 -0
- package/dist/sinks/schema.d.ts +9 -0
- package/dist/sinks/schema.js +9 -0
- package/dist/sinks/slack/format.d.ts +65 -0
- package/dist/sinks/slack/format.js +327 -0
- package/dist/sinks/slack/index.d.ts +27 -0
- package/dist/sinks/slack/index.js +78 -0
- package/dist/sinks/slack-sink.d.ts +27 -0
- package/dist/sinks/slack-sink.js +78 -0
- package/dist/sinks/types.d.ts +59 -0
- package/dist/sinks/types.js +44 -0
- package/dist/sinks/webhook/index.d.ts +19 -0
- package/dist/sinks/webhook/index.js +50 -0
- package/dist/sinks/webhook-sink.d.ts +19 -0
- package/dist/sinks/webhook-sink.js +50 -0
- package/dist/sources.d.ts +104 -0
- package/dist/sources.js +292 -0
- package/dist/webhook/budget.d.ts +42 -0
- package/dist/webhook/budget.js +60 -0
- package/dist/webhook/debounce.d.ts +67 -0
- package/dist/webhook/debounce.js +76 -0
- package/dist/webhook/dispatch.d.ts +45 -0
- package/dist/webhook/dispatch.js +84 -0
- package/dist/webhook/eval-request-handler.d.ts +87 -0
- package/dist/webhook/eval-request-handler.js +181 -0
- package/dist/webhook/handler.d.ts +88 -0
- package/dist/webhook/handler.js +203 -0
- package/dist/webhook/index.d.ts +17 -0
- package/dist/webhook/index.js +12 -0
- package/dist/webhook/types.d.ts +109 -0
- package/dist/webhook/types.js +10 -0
- package/package.json +72 -0
- package/tasks/.expanded.agentic.yaml +51 -0
- package/tasks/.expanded.yaml +66 -0
- package/tasks/frameworks.yaml +98 -0
- package/tasks/functions.yaml +51 -0
- package/tasks/groq.yaml +216 -0
- package/tasks/nextjs-live.yaml +62 -0
- package/tasks/studio-setup.yaml +111 -0
- package/tasks/visual-editing.yaml +120 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sinks/bigquery/index.ts
|
|
3
|
+
*
|
|
4
|
+
* BigQuery analytics sink — flattens evaluation reports into rows for
|
|
5
|
+
* the `ailf.reports` and `ailf.area_scores` tables.
|
|
6
|
+
*
|
|
7
|
+
* The BigQuery schema serves as the analytical mirror of the Sanity Content
|
|
8
|
+
* Lake. While Sanity is the system of record for operational queries (Studio
|
|
9
|
+
* dashboards, webhooks, real-time listeners), BigQuery handles the analytical
|
|
10
|
+
* queries that GROQ was never designed for (percentile distributions,
|
|
11
|
+
* cost-per-quality-point trends, cross-source regression detection).
|
|
12
|
+
*
|
|
13
|
+
* Schema evolution: additive-only. New columns with NULLABLE mode don't break
|
|
14
|
+
* existing queries. The `flattenReportRow` and `flattenAreaScoreRows` functions
|
|
15
|
+
* are the single place where schema mapping lives.
|
|
16
|
+
*
|
|
17
|
+
* @see docs/design-docs/report-store/bigquery.md
|
|
18
|
+
* @see docs/design-docs/report-store/sink-architecture.md
|
|
19
|
+
*/
|
|
20
|
+
import type { Report, SinkHealthStatus, SinkResult } from "../../pipeline/types.js";
|
|
21
|
+
import type { ReportSink } from "../types.js";
|
|
22
|
+
/** One row per area per model per report in `ailf.area_scores`. */
|
|
23
|
+
export interface AreaScoreRow {
|
|
24
|
+
area: string;
|
|
25
|
+
ceiling_score: number;
|
|
26
|
+
code_correctness: number;
|
|
27
|
+
completed_at: string;
|
|
28
|
+
doc_coverage: number;
|
|
29
|
+
doc_lift: number;
|
|
30
|
+
floor_score: number;
|
|
31
|
+
mode: string;
|
|
32
|
+
model_id: string;
|
|
33
|
+
report_id: string;
|
|
34
|
+
source_name: string;
|
|
35
|
+
task_completion: number;
|
|
36
|
+
test_count: number;
|
|
37
|
+
total_cost: number;
|
|
38
|
+
total_score: number;
|
|
39
|
+
}
|
|
40
|
+
/** Options for constructing a BigQuerySink instance. */
|
|
41
|
+
export interface BigQuerySinkOptions {
|
|
42
|
+
/** Path to a service account JSON credentials file */
|
|
43
|
+
credentials?: string;
|
|
44
|
+
/** BigQuery dataset name (e.g., "ailf") */
|
|
45
|
+
dataset: string;
|
|
46
|
+
/** BigQuery project ID (e.g., "ailf-reports") */
|
|
47
|
+
project: string;
|
|
48
|
+
}
|
|
49
|
+
/** One row per evaluation run in `ailf.reports`. */
|
|
50
|
+
export interface ReportRow {
|
|
51
|
+
area_count: number;
|
|
52
|
+
areas: string[];
|
|
53
|
+
avg_doc_lift: number;
|
|
54
|
+
avg_score: number;
|
|
55
|
+
completed_at: string;
|
|
56
|
+
duration_ms: number;
|
|
57
|
+
git_branch: null | string;
|
|
58
|
+
git_pr_number: null | number;
|
|
59
|
+
git_repo: null | string;
|
|
60
|
+
git_sha: null | string;
|
|
61
|
+
grader_cost: null | number;
|
|
62
|
+
grader_model: string;
|
|
63
|
+
mode: string;
|
|
64
|
+
model_count: number;
|
|
65
|
+
models: string[];
|
|
66
|
+
promptfoo_url: null | string;
|
|
67
|
+
promptfoo_urls: null | string;
|
|
68
|
+
report_id: string;
|
|
69
|
+
source_base_url: string;
|
|
70
|
+
source_dataset: null | string;
|
|
71
|
+
source_name: string;
|
|
72
|
+
source_perspective: null | string;
|
|
73
|
+
tag: null | string;
|
|
74
|
+
total_cost: null | number;
|
|
75
|
+
trigger_caller_repo: null | string;
|
|
76
|
+
trigger_type: string;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* BigQuery sink — inserts flattened report rows into BigQuery tables.
|
|
80
|
+
*
|
|
81
|
+
* Transforms the nested `Report` into flat rows suitable for SQL analytics.
|
|
82
|
+
* Two tables are populated per publish:
|
|
83
|
+
* - `ailf.reports` — one row per evaluation run
|
|
84
|
+
* - `ailf.area_scores` — one row per area per model per report
|
|
85
|
+
*
|
|
86
|
+
* The sink creates a BigQuery client lazily on first use. If credentials
|
|
87
|
+
* are not provided, it falls back to Application Default Credentials (ADC).
|
|
88
|
+
*/
|
|
89
|
+
export declare class BigQuerySink implements ReportSink {
|
|
90
|
+
readonly name = "bigquery";
|
|
91
|
+
private client;
|
|
92
|
+
private readonly options;
|
|
93
|
+
constructor(options: BigQuerySinkOptions);
|
|
94
|
+
/**
|
|
95
|
+
* Health check — validates that BigQuery is reachable and the dataset exists.
|
|
96
|
+
*
|
|
97
|
+
* This catches common misconfigurations early (wrong project, missing
|
|
98
|
+
* credentials, non-existent dataset) before the pipeline runs a full eval.
|
|
99
|
+
*/
|
|
100
|
+
healthCheck(): Promise<SinkHealthStatus>;
|
|
101
|
+
/**
|
|
102
|
+
* Publish a report to BigQuery — inserts into both tables.
|
|
103
|
+
*
|
|
104
|
+
* The report row goes into `ailf.reports` and the area score rows go
|
|
105
|
+
* into `ailf.area_scores`. Both inserts use streaming inserts for
|
|
106
|
+
* near-real-time availability.
|
|
107
|
+
*/
|
|
108
|
+
publish(report: Report): Promise<SinkResult>;
|
|
109
|
+
/** Lazily create the BigQuery client (reused across calls). */
|
|
110
|
+
private getClient;
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Flatten a Report into rows for the `ailf.area_scores` table.
|
|
114
|
+
*
|
|
115
|
+
* Produces one row per area per model. When per-model data is available
|
|
116
|
+
* (multi-model evaluations), each model's area scores become separate rows.
|
|
117
|
+
* When only aggregate scores exist (single model or no per-model breakdown),
|
|
118
|
+
* the model_id is set to the first model in provenance.
|
|
119
|
+
*
|
|
120
|
+
* Denormalized fields (completed_at, mode, source_name) are included for
|
|
121
|
+
* BigQuery partitioning and clustering efficiency.
|
|
122
|
+
*/
|
|
123
|
+
export declare function flattenAreaScoreRows(report: Report): AreaScoreRow[];
|
|
124
|
+
/**
|
|
125
|
+
* Flatten a Report into a single row for the `ailf.reports` table.
|
|
126
|
+
*
|
|
127
|
+
* This is the single place where the nested Report structure maps to
|
|
128
|
+
* the flat BigQuery schema. When the report format evolves, update this
|
|
129
|
+
* function and add new NULLABLE columns to the BigQuery table.
|
|
130
|
+
*/
|
|
131
|
+
export declare function flattenReportRow(report: Report): ReportRow;
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sinks/bigquery/index.ts
|
|
3
|
+
*
|
|
4
|
+
* BigQuery analytics sink — flattens evaluation reports into rows for
|
|
5
|
+
* the `ailf.reports` and `ailf.area_scores` tables.
|
|
6
|
+
*
|
|
7
|
+
* The BigQuery schema serves as the analytical mirror of the Sanity Content
|
|
8
|
+
* Lake. While Sanity is the system of record for operational queries (Studio
|
|
9
|
+
* dashboards, webhooks, real-time listeners), BigQuery handles the analytical
|
|
10
|
+
* queries that GROQ was never designed for (percentile distributions,
|
|
11
|
+
* cost-per-quality-point trends, cross-source regression detection).
|
|
12
|
+
*
|
|
13
|
+
* Schema evolution: additive-only. New columns with NULLABLE mode don't break
|
|
14
|
+
* existing queries. The `flattenReportRow` and `flattenAreaScoreRows` functions
|
|
15
|
+
* are the single place where schema mapping lives.
|
|
16
|
+
*
|
|
17
|
+
* @see docs/design-docs/report-store/bigquery.md
|
|
18
|
+
* @see docs/design-docs/report-store/sink-architecture.md
|
|
19
|
+
*/
|
|
20
|
+
import { BigQuery } from "@google-cloud/bigquery";
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
// BigQuerySink class
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
/**
|
|
25
|
+
* BigQuery sink — inserts flattened report rows into BigQuery tables.
|
|
26
|
+
*
|
|
27
|
+
* Transforms the nested `Report` into flat rows suitable for SQL analytics.
|
|
28
|
+
* Two tables are populated per publish:
|
|
29
|
+
* - `ailf.reports` — one row per evaluation run
|
|
30
|
+
* - `ailf.area_scores` — one row per area per model per report
|
|
31
|
+
*
|
|
32
|
+
* The sink creates a BigQuery client lazily on first use. If credentials
|
|
33
|
+
* are not provided, it falls back to Application Default Credentials (ADC).
|
|
34
|
+
*/
|
|
35
|
+
export class BigQuerySink {
|
|
36
|
+
name = "bigquery";
|
|
37
|
+
client = null;
|
|
38
|
+
options;
|
|
39
|
+
constructor(options) {
|
|
40
|
+
this.options = options;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Health check — validates that BigQuery is reachable and the dataset exists.
|
|
44
|
+
*
|
|
45
|
+
* This catches common misconfigurations early (wrong project, missing
|
|
46
|
+
* credentials, non-existent dataset) before the pipeline runs a full eval.
|
|
47
|
+
*/
|
|
48
|
+
async healthCheck() {
|
|
49
|
+
try {
|
|
50
|
+
const client = this.getClient();
|
|
51
|
+
const dataset = client.dataset(this.options.dataset);
|
|
52
|
+
const [exists] = await dataset.exists();
|
|
53
|
+
if (!exists) {
|
|
54
|
+
return {
|
|
55
|
+
healthy: false,
|
|
56
|
+
reason: `Dataset "${this.options.dataset}" does not exist in project "${this.options.project}"`,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
return { healthy: true };
|
|
60
|
+
}
|
|
61
|
+
catch (error) {
|
|
62
|
+
return {
|
|
63
|
+
healthy: false,
|
|
64
|
+
reason: `BigQuery health check failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Publish a report to BigQuery — inserts into both tables.
|
|
70
|
+
*
|
|
71
|
+
* The report row goes into `ailf.reports` and the area score rows go
|
|
72
|
+
* into `ailf.area_scores`. Both inserts use streaming inserts for
|
|
73
|
+
* near-real-time availability.
|
|
74
|
+
*/
|
|
75
|
+
async publish(report) {
|
|
76
|
+
try {
|
|
77
|
+
const client = this.getClient();
|
|
78
|
+
const dataset = client.dataset(this.options.dataset);
|
|
79
|
+
const reportRow = flattenReportRow(report);
|
|
80
|
+
const areaRows = flattenAreaScoreRows(report);
|
|
81
|
+
// Insert report row
|
|
82
|
+
const reportsTable = dataset.table("reports");
|
|
83
|
+
await reportsTable.insert([reportRow]);
|
|
84
|
+
// Insert area score rows (may be empty if no scores)
|
|
85
|
+
if (areaRows.length > 0) {
|
|
86
|
+
const areaScoresTable = dataset.table("area_scores");
|
|
87
|
+
await areaScoresTable.insert(areaRows);
|
|
88
|
+
}
|
|
89
|
+
const totalRows = 1 + areaRows.length;
|
|
90
|
+
return {
|
|
91
|
+
detail: `${totalRows} row${totalRows === 1 ? "" : "s"} inserted (1 report + ${areaRows.length} area scores)`,
|
|
92
|
+
status: "success",
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
catch (error) {
|
|
96
|
+
return {
|
|
97
|
+
error: `BigQuery insert failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
98
|
+
status: "failed",
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
// -----------------------------------------------------------------------
|
|
103
|
+
// Private helpers
|
|
104
|
+
// -----------------------------------------------------------------------
|
|
105
|
+
/** Lazily create the BigQuery client (reused across calls). */
|
|
106
|
+
getClient() {
|
|
107
|
+
this.client ??= new BigQuery({
|
|
108
|
+
keyFilename: this.options.credentials ?? undefined,
|
|
109
|
+
projectId: this.options.project,
|
|
110
|
+
});
|
|
111
|
+
return this.client;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
// ---------------------------------------------------------------------------
|
|
115
|
+
// Flattening functions (the schema mapping layer)
|
|
116
|
+
// ---------------------------------------------------------------------------
|
|
117
|
+
/**
|
|
118
|
+
* Flatten a Report into rows for the `ailf.area_scores` table.
|
|
119
|
+
*
|
|
120
|
+
* Produces one row per area per model. When per-model data is available
|
|
121
|
+
* (multi-model evaluations), each model's area scores become separate rows.
|
|
122
|
+
* When only aggregate scores exist (single model or no per-model breakdown),
|
|
123
|
+
* the model_id is set to the first model in provenance.
|
|
124
|
+
*
|
|
125
|
+
* Denormalized fields (completed_at, mode, source_name) are included for
|
|
126
|
+
* BigQuery partitioning and clustering efficiency.
|
|
127
|
+
*/
|
|
128
|
+
export function flattenAreaScoreRows(report) {
|
|
129
|
+
const { provenance, summary } = report;
|
|
130
|
+
const rows = [];
|
|
131
|
+
// Common denormalized fields for partitioning/clustering
|
|
132
|
+
const common = {
|
|
133
|
+
completed_at: report.completedAt,
|
|
134
|
+
mode: provenance.mode,
|
|
135
|
+
report_id: report.id,
|
|
136
|
+
source_name: provenance.source.name,
|
|
137
|
+
};
|
|
138
|
+
// When per-model data is available, use it for model-level granularity
|
|
139
|
+
if (summary.perModel && summary.perModel.length > 0) {
|
|
140
|
+
for (const modelEntry of summary.perModel) {
|
|
141
|
+
for (const score of modelEntry.scores) {
|
|
142
|
+
rows.push({
|
|
143
|
+
...common,
|
|
144
|
+
area: score.feature,
|
|
145
|
+
ceiling_score: score.ceilingScore,
|
|
146
|
+
code_correctness: score.codeCorrectness,
|
|
147
|
+
doc_coverage: score.docCoverage,
|
|
148
|
+
doc_lift: score.docLift,
|
|
149
|
+
floor_score: score.floorScore,
|
|
150
|
+
model_id: modelEntry.modelId,
|
|
151
|
+
task_completion: score.taskCompletion,
|
|
152
|
+
test_count: score.testCount,
|
|
153
|
+
total_cost: score.totalCost,
|
|
154
|
+
total_score: score.totalScore,
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
else {
|
|
160
|
+
// Fallback: use aggregate scores with the first model ID
|
|
161
|
+
const modelId = provenance.models[0]?.id ?? "unknown";
|
|
162
|
+
for (const score of summary.scores) {
|
|
163
|
+
rows.push({
|
|
164
|
+
...common,
|
|
165
|
+
area: score.feature,
|
|
166
|
+
ceiling_score: score.ceilingScore,
|
|
167
|
+
code_correctness: score.codeCorrectness,
|
|
168
|
+
doc_coverage: score.docCoverage,
|
|
169
|
+
doc_lift: score.docLift,
|
|
170
|
+
floor_score: score.floorScore,
|
|
171
|
+
model_id: modelId,
|
|
172
|
+
task_completion: score.taskCompletion,
|
|
173
|
+
test_count: score.testCount,
|
|
174
|
+
total_cost: score.totalCost,
|
|
175
|
+
total_score: score.totalScore,
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
return rows;
|
|
180
|
+
}
|
|
181
|
+
/**
|
|
182
|
+
* Flatten a Report into a single row for the `ailf.reports` table.
|
|
183
|
+
*
|
|
184
|
+
* This is the single place where the nested Report structure maps to
|
|
185
|
+
* the flat BigQuery schema. When the report format evolves, update this
|
|
186
|
+
* function and add new NULLABLE columns to the BigQuery table.
|
|
187
|
+
*/
|
|
188
|
+
export function flattenReportRow(report) {
|
|
189
|
+
const { provenance, summary } = report;
|
|
190
|
+
return {
|
|
191
|
+
area_count: provenance.areas.length,
|
|
192
|
+
areas: provenance.areas,
|
|
193
|
+
avg_doc_lift: summary.overall.avgDocLift,
|
|
194
|
+
avg_score: summary.overall.avgScore,
|
|
195
|
+
completed_at: report.completedAt,
|
|
196
|
+
duration_ms: report.durationMs,
|
|
197
|
+
git_branch: provenance.git?.branch ?? null,
|
|
198
|
+
git_pr_number: provenance.git?.prNumber ?? null,
|
|
199
|
+
git_repo: provenance.git?.repo ?? null,
|
|
200
|
+
git_sha: provenance.git?.sha ?? null,
|
|
201
|
+
grader_cost: summary.overall.cost?.graderTotal ?? null,
|
|
202
|
+
grader_model: provenance.graderModel,
|
|
203
|
+
mode: provenance.mode,
|
|
204
|
+
model_count: provenance.models.length,
|
|
205
|
+
models: provenance.models.map((m) => m.id),
|
|
206
|
+
promptfoo_url: provenance.promptfooUrl ?? null,
|
|
207
|
+
promptfoo_urls: provenance.promptfooUrls
|
|
208
|
+
? JSON.stringify(provenance.promptfooUrls)
|
|
209
|
+
: null,
|
|
210
|
+
report_id: report.id,
|
|
211
|
+
source_base_url: provenance.source.baseUrl,
|
|
212
|
+
source_dataset: provenance.source.dataset ?? null,
|
|
213
|
+
source_name: provenance.source.name,
|
|
214
|
+
source_perspective: provenance.source.perspective ?? null,
|
|
215
|
+
tag: report.tag ?? null,
|
|
216
|
+
total_cost: summary.overall.cost?.total ?? null,
|
|
217
|
+
trigger_caller_repo: provenance.trigger.type === "cross-repo"
|
|
218
|
+
? provenance.trigger.callerRepo
|
|
219
|
+
: null,
|
|
220
|
+
trigger_type: provenance.trigger.type,
|
|
221
|
+
};
|
|
222
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sinks/format-slack.ts
|
|
3
|
+
*
|
|
4
|
+
* Formats evaluation report data into Slack Block Kit structures for the
|
|
5
|
+
* SlackSink. Provides two message formats:
|
|
6
|
+
*
|
|
7
|
+
* - `formatRegressionAlert` — detailed regression notification with
|
|
8
|
+
* per-area dimension breakdowns
|
|
9
|
+
* - `formatScoreSummary` — compact score overview for general reporting
|
|
10
|
+
*
|
|
11
|
+
* @see docs/design-docs/report-store/sink-architecture.md
|
|
12
|
+
*/
|
|
13
|
+
import type { Report } from "../pipeline/types.js";
|
|
14
|
+
import type { DigestSummary } from "../schedules/digest.js";
|
|
15
|
+
export interface SlackMessage {
|
|
16
|
+
blocks: SlackBlock[];
|
|
17
|
+
text: string;
|
|
18
|
+
}
|
|
19
|
+
interface SlackBlock {
|
|
20
|
+
elements?: Array<{
|
|
21
|
+
text: string;
|
|
22
|
+
type: "mrkdwn" | "plain_text";
|
|
23
|
+
}>;
|
|
24
|
+
fields?: Array<{
|
|
25
|
+
text: string;
|
|
26
|
+
type: "mrkdwn" | "plain_text";
|
|
27
|
+
}>;
|
|
28
|
+
text?: {
|
|
29
|
+
text: string;
|
|
30
|
+
type: "mrkdwn" | "plain_text";
|
|
31
|
+
};
|
|
32
|
+
type: "context" | "divider" | "header" | "section";
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Format a regression alert for areas that have regressed.
|
|
36
|
+
*
|
|
37
|
+
* Produces a rich Slack message with:
|
|
38
|
+
* - Header with overall score change
|
|
39
|
+
* - Context metadata (mode, source, timestamp, promptfoo link)
|
|
40
|
+
* - Per-area regression details with dimension breakdowns
|
|
41
|
+
* - Brief mentions of improved and unchanged areas
|
|
42
|
+
*/
|
|
43
|
+
export declare function formatRegressionAlert(report: Report): SlackMessage;
|
|
44
|
+
/**
|
|
45
|
+
* Format a general score summary for Slack reporting.
|
|
46
|
+
*
|
|
47
|
+
* Produces a compact overview with:
|
|
48
|
+
* - Overall score with grade emoji
|
|
49
|
+
* - Per-area score table
|
|
50
|
+
* - Cost summary (if available)
|
|
51
|
+
* - Promptfoo link (if available)
|
|
52
|
+
*/
|
|
53
|
+
export declare function formatScoreSummary(report: Report): SlackMessage;
|
|
54
|
+
/**
|
|
55
|
+
* Format a weekly digest summary for Slack.
|
|
56
|
+
*
|
|
57
|
+
* Produces a summary message covering score trends over a time window:
|
|
58
|
+
* - Header with overall trend direction and score
|
|
59
|
+
* - Per-area trend table with arrows
|
|
60
|
+
* - Lists of improved, regressed, and stable areas
|
|
61
|
+
* - Report count and time window metadata
|
|
62
|
+
*/
|
|
63
|
+
export declare function formatWeeklyDigest(digest: DigestSummary): SlackMessage;
|
|
64
|
+
export {};
|