@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,131 @@
1
+ /**
2
+ * sinks/bigquery/index.ts
3
+ *
4
+ * BigQuery analytics sink — flattens evaluation reports into rows for
5
+ * the `ailf.reports` and `ailf.area_scores` tables.
6
+ *
7
+ * The BigQuery schema serves as the analytical mirror of the Sanity Content
8
+ * Lake. While Sanity is the system of record for operational queries (Studio
9
+ * dashboards, webhooks, real-time listeners), BigQuery handles the analytical
10
+ * queries that GROQ was never designed for (percentile distributions,
11
+ * cost-per-quality-point trends, cross-source regression detection).
12
+ *
13
+ * Schema evolution: additive-only. New columns with NULLABLE mode don't break
14
+ * existing queries. The `flattenReportRow` and `flattenAreaScoreRows` functions
15
+ * are the single place where schema mapping lives.
16
+ *
17
+ * @see docs/design-docs/report-store/bigquery.md
18
+ * @see docs/design-docs/report-store/sink-architecture.md
19
+ */
20
+ import type { Report, SinkHealthStatus, SinkResult } from "../../pipeline/types.js";
21
+ import type { ReportSink } from "../types.js";
22
+ /** One row per area per model per report in `ailf.area_scores`. */
23
+ export interface AreaScoreRow {
24
+ area: string;
25
+ ceiling_score: number;
26
+ code_correctness: number;
27
+ completed_at: string;
28
+ doc_coverage: number;
29
+ doc_lift: number;
30
+ floor_score: number;
31
+ mode: string;
32
+ model_id: string;
33
+ report_id: string;
34
+ source_name: string;
35
+ task_completion: number;
36
+ test_count: number;
37
+ total_cost: number;
38
+ total_score: number;
39
+ }
40
+ /** Options for constructing a BigQuerySink instance. */
41
+ export interface BigQuerySinkOptions {
42
+ /** Path to a service account JSON credentials file */
43
+ credentials?: string;
44
+ /** BigQuery dataset name (e.g., "ailf") */
45
+ dataset: string;
46
+ /** BigQuery project ID (e.g., "ailf-reports") */
47
+ project: string;
48
+ }
49
+ /** One row per evaluation run in `ailf.reports`. */
50
+ export interface ReportRow {
51
+ area_count: number;
52
+ areas: string[];
53
+ avg_doc_lift: number;
54
+ avg_score: number;
55
+ completed_at: string;
56
+ duration_ms: number;
57
+ git_branch: null | string;
58
+ git_pr_number: null | number;
59
+ git_repo: null | string;
60
+ git_sha: null | string;
61
+ grader_cost: null | number;
62
+ grader_model: string;
63
+ mode: string;
64
+ model_count: number;
65
+ models: string[];
66
+ promptfoo_url: null | string;
67
+ promptfoo_urls: null | string;
68
+ report_id: string;
69
+ source_base_url: string;
70
+ source_dataset: null | string;
71
+ source_name: string;
72
+ source_perspective: null | string;
73
+ tag: null | string;
74
+ total_cost: null | number;
75
+ trigger_caller_repo: null | string;
76
+ trigger_type: string;
77
+ }
78
+ /**
79
+ * BigQuery sink — inserts flattened report rows into BigQuery tables.
80
+ *
81
+ * Transforms the nested `Report` into flat rows suitable for SQL analytics.
82
+ * Two tables are populated per publish:
83
+ * - `ailf.reports` — one row per evaluation run
84
+ * - `ailf.area_scores` — one row per area per model per report
85
+ *
86
+ * The sink creates a BigQuery client lazily on first use. If credentials
87
+ * are not provided, it falls back to Application Default Credentials (ADC).
88
+ */
89
+ export declare class BigQuerySink implements ReportSink {
90
+ readonly name = "bigquery";
91
+ private client;
92
+ private readonly options;
93
+ constructor(options: BigQuerySinkOptions);
94
+ /**
95
+ * Health check — validates that BigQuery is reachable and the dataset exists.
96
+ *
97
+ * This catches common misconfigurations early (wrong project, missing
98
+ * credentials, non-existent dataset) before the pipeline runs a full eval.
99
+ */
100
+ healthCheck(): Promise<SinkHealthStatus>;
101
+ /**
102
+ * Publish a report to BigQuery — inserts into both tables.
103
+ *
104
+ * The report row goes into `ailf.reports` and the area score rows go
105
+ * into `ailf.area_scores`. Both inserts use streaming inserts for
106
+ * near-real-time availability.
107
+ */
108
+ publish(report: Report): Promise<SinkResult>;
109
+ /** Lazily create the BigQuery client (reused across calls). */
110
+ private getClient;
111
+ }
112
+ /**
113
+ * Flatten a Report into rows for the `ailf.area_scores` table.
114
+ *
115
+ * Produces one row per area per model. When per-model data is available
116
+ * (multi-model evaluations), each model's area scores become separate rows.
117
+ * When only aggregate scores exist (single model or no per-model breakdown),
118
+ * the model_id is set to the first model in provenance.
119
+ *
120
+ * Denormalized fields (completed_at, mode, source_name) are included for
121
+ * BigQuery partitioning and clustering efficiency.
122
+ */
123
+ export declare function flattenAreaScoreRows(report: Report): AreaScoreRow[];
124
+ /**
125
+ * Flatten a Report into a single row for the `ailf.reports` table.
126
+ *
127
+ * This is the single place where the nested Report structure maps to
128
+ * the flat BigQuery schema. When the report format evolves, update this
129
+ * function and add new NULLABLE columns to the BigQuery table.
130
+ */
131
+ export declare function flattenReportRow(report: Report): ReportRow;
@@ -0,0 +1,222 @@
1
+ /**
2
+ * sinks/bigquery/index.ts
3
+ *
4
+ * BigQuery analytics sink — flattens evaluation reports into rows for
5
+ * the `ailf.reports` and `ailf.area_scores` tables.
6
+ *
7
+ * The BigQuery schema serves as the analytical mirror of the Sanity Content
8
+ * Lake. While Sanity is the system of record for operational queries (Studio
9
+ * dashboards, webhooks, real-time listeners), BigQuery handles the analytical
10
+ * queries that GROQ was never designed for (percentile distributions,
11
+ * cost-per-quality-point trends, cross-source regression detection).
12
+ *
13
+ * Schema evolution: additive-only. New columns with NULLABLE mode don't break
14
+ * existing queries. The `flattenReportRow` and `flattenAreaScoreRows` functions
15
+ * are the single place where schema mapping lives.
16
+ *
17
+ * @see docs/design-docs/report-store/bigquery.md
18
+ * @see docs/design-docs/report-store/sink-architecture.md
19
+ */
20
+ import { BigQuery } from "@google-cloud/bigquery";
21
+ // ---------------------------------------------------------------------------
22
+ // BigQuerySink class
23
+ // ---------------------------------------------------------------------------
24
+ /**
25
+ * BigQuery sink — inserts flattened report rows into BigQuery tables.
26
+ *
27
+ * Transforms the nested `Report` into flat rows suitable for SQL analytics.
28
+ * Two tables are populated per publish:
29
+ * - `ailf.reports` — one row per evaluation run
30
+ * - `ailf.area_scores` — one row per area per model per report
31
+ *
32
+ * The sink creates a BigQuery client lazily on first use. If credentials
33
+ * are not provided, it falls back to Application Default Credentials (ADC).
34
+ */
35
+ export class BigQuerySink {
36
+ name = "bigquery";
37
+ client = null;
38
+ options;
39
+ constructor(options) {
40
+ this.options = options;
41
+ }
42
+ /**
43
+ * Health check — validates that BigQuery is reachable and the dataset exists.
44
+ *
45
+ * This catches common misconfigurations early (wrong project, missing
46
+ * credentials, non-existent dataset) before the pipeline runs a full eval.
47
+ */
48
+ async healthCheck() {
49
+ try {
50
+ const client = this.getClient();
51
+ const dataset = client.dataset(this.options.dataset);
52
+ const [exists] = await dataset.exists();
53
+ if (!exists) {
54
+ return {
55
+ healthy: false,
56
+ reason: `Dataset "${this.options.dataset}" does not exist in project "${this.options.project}"`,
57
+ };
58
+ }
59
+ return { healthy: true };
60
+ }
61
+ catch (error) {
62
+ return {
63
+ healthy: false,
64
+ reason: `BigQuery health check failed: ${error instanceof Error ? error.message : String(error)}`,
65
+ };
66
+ }
67
+ }
68
+ /**
69
+ * Publish a report to BigQuery — inserts into both tables.
70
+ *
71
+ * The report row goes into `ailf.reports` and the area score rows go
72
+ * into `ailf.area_scores`. Both inserts use streaming inserts for
73
+ * near-real-time availability.
74
+ */
75
+ async publish(report) {
76
+ try {
77
+ const client = this.getClient();
78
+ const dataset = client.dataset(this.options.dataset);
79
+ const reportRow = flattenReportRow(report);
80
+ const areaRows = flattenAreaScoreRows(report);
81
+ // Insert report row
82
+ const reportsTable = dataset.table("reports");
83
+ await reportsTable.insert([reportRow]);
84
+ // Insert area score rows (may be empty if no scores)
85
+ if (areaRows.length > 0) {
86
+ const areaScoresTable = dataset.table("area_scores");
87
+ await areaScoresTable.insert(areaRows);
88
+ }
89
+ const totalRows = 1 + areaRows.length;
90
+ return {
91
+ detail: `${totalRows} row${totalRows === 1 ? "" : "s"} inserted (1 report + ${areaRows.length} area scores)`,
92
+ status: "success",
93
+ };
94
+ }
95
+ catch (error) {
96
+ return {
97
+ error: `BigQuery insert failed: ${error instanceof Error ? error.message : String(error)}`,
98
+ status: "failed",
99
+ };
100
+ }
101
+ }
102
+ // -----------------------------------------------------------------------
103
+ // Private helpers
104
+ // -----------------------------------------------------------------------
105
+ /** Lazily create the BigQuery client (reused across calls). */
106
+ getClient() {
107
+ this.client ??= new BigQuery({
108
+ keyFilename: this.options.credentials ?? undefined,
109
+ projectId: this.options.project,
110
+ });
111
+ return this.client;
112
+ }
113
+ }
114
+ // ---------------------------------------------------------------------------
115
+ // Flattening functions (the schema mapping layer)
116
+ // ---------------------------------------------------------------------------
117
+ /**
118
+ * Flatten a Report into rows for the `ailf.area_scores` table.
119
+ *
120
+ * Produces one row per area per model. When per-model data is available
121
+ * (multi-model evaluations), each model's area scores become separate rows.
122
+ * When only aggregate scores exist (single model or no per-model breakdown),
123
+ * the model_id is set to the first model in provenance.
124
+ *
125
+ * Denormalized fields (completed_at, mode, source_name) are included for
126
+ * BigQuery partitioning and clustering efficiency.
127
+ */
128
+ export function flattenAreaScoreRows(report) {
129
+ const { provenance, summary } = report;
130
+ const rows = [];
131
+ // Common denormalized fields for partitioning/clustering
132
+ const common = {
133
+ completed_at: report.completedAt,
134
+ mode: provenance.mode,
135
+ report_id: report.id,
136
+ source_name: provenance.source.name,
137
+ };
138
+ // When per-model data is available, use it for model-level granularity
139
+ if (summary.perModel && summary.perModel.length > 0) {
140
+ for (const modelEntry of summary.perModel) {
141
+ for (const score of modelEntry.scores) {
142
+ rows.push({
143
+ ...common,
144
+ area: score.feature,
145
+ ceiling_score: score.ceilingScore,
146
+ code_correctness: score.codeCorrectness,
147
+ doc_coverage: score.docCoverage,
148
+ doc_lift: score.docLift,
149
+ floor_score: score.floorScore,
150
+ model_id: modelEntry.modelId,
151
+ task_completion: score.taskCompletion,
152
+ test_count: score.testCount,
153
+ total_cost: score.totalCost,
154
+ total_score: score.totalScore,
155
+ });
156
+ }
157
+ }
158
+ }
159
+ else {
160
+ // Fallback: use aggregate scores with the first model ID
161
+ const modelId = provenance.models[0]?.id ?? "unknown";
162
+ for (const score of summary.scores) {
163
+ rows.push({
164
+ ...common,
165
+ area: score.feature,
166
+ ceiling_score: score.ceilingScore,
167
+ code_correctness: score.codeCorrectness,
168
+ doc_coverage: score.docCoverage,
169
+ doc_lift: score.docLift,
170
+ floor_score: score.floorScore,
171
+ model_id: modelId,
172
+ task_completion: score.taskCompletion,
173
+ test_count: score.testCount,
174
+ total_cost: score.totalCost,
175
+ total_score: score.totalScore,
176
+ });
177
+ }
178
+ }
179
+ return rows;
180
+ }
181
+ /**
182
+ * Flatten a Report into a single row for the `ailf.reports` table.
183
+ *
184
+ * This is the single place where the nested Report structure maps to
185
+ * the flat BigQuery schema. When the report format evolves, update this
186
+ * function and add new NULLABLE columns to the BigQuery table.
187
+ */
188
+ export function flattenReportRow(report) {
189
+ const { provenance, summary } = report;
190
+ return {
191
+ area_count: provenance.areas.length,
192
+ areas: provenance.areas,
193
+ avg_doc_lift: summary.overall.avgDocLift,
194
+ avg_score: summary.overall.avgScore,
195
+ completed_at: report.completedAt,
196
+ duration_ms: report.durationMs,
197
+ git_branch: provenance.git?.branch ?? null,
198
+ git_pr_number: provenance.git?.prNumber ?? null,
199
+ git_repo: provenance.git?.repo ?? null,
200
+ git_sha: provenance.git?.sha ?? null,
201
+ grader_cost: summary.overall.cost?.graderTotal ?? null,
202
+ grader_model: provenance.graderModel,
203
+ mode: provenance.mode,
204
+ model_count: provenance.models.length,
205
+ models: provenance.models.map((m) => m.id),
206
+ promptfoo_url: provenance.promptfooUrl ?? null,
207
+ promptfoo_urls: provenance.promptfooUrls
208
+ ? JSON.stringify(provenance.promptfooUrls)
209
+ : null,
210
+ report_id: report.id,
211
+ source_base_url: provenance.source.baseUrl,
212
+ source_dataset: provenance.source.dataset ?? null,
213
+ source_name: provenance.source.name,
214
+ source_perspective: provenance.source.perspective ?? null,
215
+ tag: report.tag ?? null,
216
+ total_cost: summary.overall.cost?.total ?? null,
217
+ trigger_caller_repo: provenance.trigger.type === "cross-repo"
218
+ ? provenance.trigger.callerRepo
219
+ : null,
220
+ trigger_type: provenance.trigger.type,
221
+ };
222
+ }
@@ -0,0 +1,64 @@
1
+ /**
2
+ * sinks/format-slack.ts
3
+ *
4
+ * Formats evaluation report data into Slack Block Kit structures for the
5
+ * SlackSink. Provides two message formats:
6
+ *
7
+ * - `formatRegressionAlert` — detailed regression notification with
8
+ * per-area dimension breakdowns
9
+ * - `formatScoreSummary` — compact score overview for general reporting
10
+ *
11
+ * @see docs/design-docs/report-store/sink-architecture.md
12
+ */
13
+ import type { Report } from "../pipeline/types.js";
14
+ import type { DigestSummary } from "../schedules/digest.js";
15
+ export interface SlackMessage {
16
+ blocks: SlackBlock[];
17
+ text: string;
18
+ }
19
+ interface SlackBlock {
20
+ elements?: Array<{
21
+ text: string;
22
+ type: "mrkdwn" | "plain_text";
23
+ }>;
24
+ fields?: Array<{
25
+ text: string;
26
+ type: "mrkdwn" | "plain_text";
27
+ }>;
28
+ text?: {
29
+ text: string;
30
+ type: "mrkdwn" | "plain_text";
31
+ };
32
+ type: "context" | "divider" | "header" | "section";
33
+ }
34
+ /**
35
+ * Format a regression alert for areas that have regressed.
36
+ *
37
+ * Produces a rich Slack message with:
38
+ * - Header with overall score change
39
+ * - Context metadata (mode, source, timestamp, promptfoo link)
40
+ * - Per-area regression details with dimension breakdowns
41
+ * - Brief mentions of improved and unchanged areas
42
+ */
43
+ export declare function formatRegressionAlert(report: Report): SlackMessage;
44
+ /**
45
+ * Format a general score summary for Slack reporting.
46
+ *
47
+ * Produces a compact overview with:
48
+ * - Overall score with grade emoji
49
+ * - Per-area score table
50
+ * - Cost summary (if available)
51
+ * - Promptfoo link (if available)
52
+ */
53
+ export declare function formatScoreSummary(report: Report): SlackMessage;
54
+ /**
55
+ * Format a weekly digest summary for Slack.
56
+ *
57
+ * Produces a summary message covering score trends over a time window:
58
+ * - Header with overall trend direction and score
59
+ * - Per-area trend table with arrows
60
+ * - Lists of improved, regressed, and stable areas
61
+ * - Report count and time window metadata
62
+ */
63
+ export declare function formatWeeklyDigest(digest: DigestSummary): SlackMessage;
64
+ export {};