@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,222 @@
1
+ /**
2
+ * pipeline/release-report.ts
3
+ *
4
+ * Cross-area release impact reports.
5
+ *
6
+ * Phase 4c of the Scenario Matrix implementation.
7
+ *
8
+ * Consolidates impact data from multi-area content releases into a single
9
+ * report that combines document classification (4a), before/after deltas (2b),
10
+ * attribution (2c), and probe results (4b) into the document × area × task
11
+ * impact matrix specified by Scenario 2.4.
12
+ *
13
+ * @see docs/exec-plans/completed/scenario-matrix-implementation/phase-4-content-release-integration.md
14
+ */
15
+ // ---------------------------------------------------------------------------
16
+ // Public API
17
+ // ---------------------------------------------------------------------------
18
+ /**
19
+ * Build a cross-area release impact report.
20
+ *
21
+ * Combines classification, comparison, attribution, and probe data into
22
+ * a single consolidated report.
23
+ *
24
+ * @param classification - Document classification from Phase 4a
25
+ * @param comparison - Before/after comparison (if available)
26
+ * @param attribution - Per-document attribution (if available)
27
+ * @param probes - Tier B probe results (if any)
28
+ * @param noiseThreshold - Threshold for "unchanged" classification
29
+ * @returns Consolidated release impact report
30
+ */
31
+ export function buildReleaseImpactReport(classification, comparison, attribution, probes, noiseThreshold) {
32
+ const threshold = noiseThreshold ?? comparison?.noiseThreshold ?? 2;
33
+ // Build per-area impact from comparison data
34
+ const areas = [];
35
+ const regressions = [];
36
+ const confirmedUnchanged = [];
37
+ if (comparison) {
38
+ for (const areaDelta of comparison.areas) {
39
+ const regressed = areaDelta.delta < -threshold;
40
+ // Find tasks and their attributed documents for this area
41
+ const areaTasks = [];
42
+ if (attribution) {
43
+ const areaAttrs = attribution.attributions.filter((a) => a.area === areaDelta.area);
44
+ for (const attr of areaAttrs) {
45
+ areaTasks.push({
46
+ attributedDocs: attr.attributedDocs,
47
+ delta: attr.delta,
48
+ taskId: attr.taskId,
49
+ });
50
+ }
51
+ }
52
+ // If no attribution data, create a single task entry per area
53
+ if (areaTasks.length === 0) {
54
+ areaTasks.push({
55
+ attributedDocs: [],
56
+ delta: areaDelta.delta,
57
+ taskId: `${areaDelta.area} (area-level)`,
58
+ });
59
+ }
60
+ areas.push({
61
+ area: areaDelta.area,
62
+ delta: areaDelta.delta,
63
+ regressed,
64
+ tasks: areaTasks,
65
+ });
66
+ if (regressed) {
67
+ regressions.push(areaDelta.area);
68
+ }
69
+ if (Math.abs(areaDelta.delta) <= threshold) {
70
+ confirmedUnchanged.push(areaDelta.area);
71
+ }
72
+ }
73
+ }
74
+ // Determine which areas were not evaluated
75
+ const evaluatedAreas = new Set(areas.map((a) => a.area));
76
+ const allAffectedAreas = new Set(classification.documents.flatMap((d) => d.affectedAreas));
77
+ const notEvaluated = [...allAffectedAreas]
78
+ .filter((a) => !evaluatedAreas.has(a))
79
+ .sort();
80
+ // Calculate overall delta
81
+ const overallDelta = comparison ? comparison.deltas.overall : 0;
82
+ return {
83
+ areas,
84
+ confirmedUnchanged,
85
+ documents: classification.documents,
86
+ generatedAt: new Date().toISOString(),
87
+ notEvaluated,
88
+ overallDelta,
89
+ probes: probes ?? [],
90
+ regressions,
91
+ };
92
+ }
93
+ // ---------------------------------------------------------------------------
94
+ // Formatting
95
+ // ---------------------------------------------------------------------------
96
+ /**
97
+ * Format a release impact report for console output.
98
+ */
99
+ export function formatReleaseImpactConsole(report) {
100
+ const lines = [];
101
+ lines.push("═══════════════════════════════════════════════════════════════");
102
+ lines.push(" CONTENT RELEASE IMPACT REPORT");
103
+ lines.push("═══════════════════════════════════════════════════════════════");
104
+ lines.push("");
105
+ const deltaStr = report.overallDelta >= 0
106
+ ? `+${report.overallDelta.toFixed(1)}`
107
+ : report.overallDelta.toFixed(1);
108
+ lines.push(` Overall impact: ${deltaStr} points`);
109
+ lines.push(` Documents: ${report.documents.length}`);
110
+ lines.push(` Areas affected: ${report.areas.length}`);
111
+ lines.push("");
112
+ // Regressions
113
+ if (report.regressions.length > 0) {
114
+ lines.push(" ⚠️ REGRESSIONS DETECTED:");
115
+ for (const area of report.regressions) {
116
+ const areaData = report.areas.find((a) => a.area === area);
117
+ if (areaData) {
118
+ lines.push(` ${area}: ${areaData.delta.toFixed(1)}`);
119
+ }
120
+ }
121
+ lines.push("");
122
+ }
123
+ // Impact matrix
124
+ if (report.areas.length > 0) {
125
+ lines.push(" Document | Area | Task | Delta");
126
+ lines.push(" ────────────────────────────────┼──────────────────┼─────────────────────────┼──────");
127
+ for (const area of report.areas) {
128
+ for (const task of area.tasks) {
129
+ const docs = task.attributedDocs.length > 0
130
+ ? task.attributedDocs.join(", ")
131
+ : "(unattributed)";
132
+ const deltaStr = task.delta >= 0 ? `+${task.delta.toFixed(1)}` : task.delta.toFixed(1);
133
+ lines.push(` ${docs.padEnd(32)} | ${area.area.padEnd(16)} | ${task.taskId.padEnd(23)} | ${deltaStr}`);
134
+ }
135
+ }
136
+ lines.push("");
137
+ }
138
+ // Probe results
139
+ if (report.probes.length > 0) {
140
+ lines.push(" TIER B PROBES (directional only):");
141
+ for (const probe of report.probes) {
142
+ lines.push(` ${probe.documentSlugs.join(", ")}: ${probe.usability}`);
143
+ }
144
+ lines.push("");
145
+ }
146
+ // Unchanged and not-evaluated areas
147
+ if (report.confirmedUnchanged.length > 0) {
148
+ lines.push(` Areas with no score change: ${report.confirmedUnchanged.join(", ")}`);
149
+ }
150
+ if (report.notEvaluated.length > 0) {
151
+ lines.push(` Areas not evaluated: ${report.notEvaluated.join(", ")}`);
152
+ }
153
+ if (report.confirmedUnchanged.length > 0 || report.notEvaluated.length > 0) {
154
+ lines.push("");
155
+ }
156
+ return lines.join("\n");
157
+ }
158
+ /**
159
+ * Format a release impact report as markdown.
160
+ */
161
+ export function formatReleaseImpactMarkdown(report) {
162
+ const lines = [];
163
+ lines.push("### 📋 Content Release Impact Report");
164
+ lines.push("");
165
+ const deltaStr = report.overallDelta >= 0
166
+ ? `+${report.overallDelta.toFixed(1)}`
167
+ : report.overallDelta.toFixed(1);
168
+ lines.push(`**Overall impact: ${deltaStr} points** · ${report.documents.length} documents · ${report.areas.length} areas`);
169
+ lines.push("");
170
+ // Regressions
171
+ if (report.regressions.length > 0) {
172
+ lines.push("#### ⚠️ Regressions");
173
+ lines.push("");
174
+ for (const area of report.regressions) {
175
+ const areaData = report.areas.find((a) => a.area === area);
176
+ if (areaData) {
177
+ lines.push(`- **${area}**: ${areaData.delta.toFixed(1)}`);
178
+ }
179
+ }
180
+ lines.push("");
181
+ }
182
+ // Impact table
183
+ if (report.areas.length > 0) {
184
+ lines.push("#### Impact Matrix");
185
+ lines.push("");
186
+ lines.push("| Document | Area | Task | Delta |");
187
+ lines.push("|----------|------|------|-------|");
188
+ for (const area of report.areas) {
189
+ for (const task of area.tasks) {
190
+ const docs = task.attributedDocs.length > 0
191
+ ? task.attributedDocs.map((d) => `\`${d}\``).join(", ")
192
+ : "—";
193
+ const deltaStr = task.delta >= 0 ? `+${task.delta.toFixed(1)}` : task.delta.toFixed(1);
194
+ const regressIcon = area.regressed ? " ⚠️" : "";
195
+ lines.push(`| ${docs} | ${area.area} | ${task.taskId} | ${deltaStr}${regressIcon} |`);
196
+ }
197
+ }
198
+ lines.push("");
199
+ }
200
+ // Probes
201
+ if (report.probes.length > 0) {
202
+ lines.push("#### 🔍 Tier B Probes (directional only)");
203
+ lines.push("");
204
+ for (const probe of report.probes) {
205
+ lines.push(`- **${probe.documentSlugs.join(", ")}**: ${probe.usability}`);
206
+ }
207
+ lines.push("");
208
+ }
209
+ // Status areas
210
+ const statusParts = [];
211
+ if (report.confirmedUnchanged.length > 0) {
212
+ statusParts.push(`**Unchanged:** ${report.confirmedUnchanged.join(", ")}`);
213
+ }
214
+ if (report.notEvaluated.length > 0) {
215
+ statusParts.push(`**Not evaluated:** ${report.notEvaluated.join(", ")}`);
216
+ }
217
+ if (statusParts.length > 0) {
218
+ lines.push(statusParts.join(" · "));
219
+ lines.push("");
220
+ }
221
+ return lines.join("\n");
222
+ }
@@ -0,0 +1,37 @@
1
+ /**
2
+ * pipeline/repo-eval-comment.ts
3
+ *
4
+ * Generates markdown PR comments for repo-based AILF evaluations.
5
+ *
6
+ * This is separate from the main pr-comment.ts (which generates the
7
+ * internal AILF repo's PR comment). Repo eval comments are designed
8
+ * for external teams and emphasize:
9
+ *
10
+ * - Documentation quality (not code quality)
11
+ * - Per-task threshold pass/fail status
12
+ * - Clear "what does this mean?" context
13
+ * - skip-ailf bypass instructions
14
+ *
15
+ * @see docs/exec-plans/completed/tasks-as-content/phase-6-pr-quality-gates.md
16
+ * @see packages/eval/src/pipeline/repo-threshold-evaluator.ts
17
+ */
18
+ import type { ComparisonReport, ScoreSummary } from "./types.js";
19
+ import type { RepoThresholdEvaluation } from "./repo-threshold-evaluator.js";
20
+ export interface RepoCommentOptions {
21
+ /** Threshold evaluation results */
22
+ thresholdEval: RepoThresholdEvaluation;
23
+ /** Score summary from the pipeline */
24
+ scoreSummary: ScoreSummary;
25
+ /** Comparison report (if baseline exists) */
26
+ comparison?: ComparisonReport;
27
+ /** Link to the full report in Studio */
28
+ reportUrl?: string;
29
+ /** Link to the Promptfoo results */
30
+ promptfooUrl?: string;
31
+ /** Whether this is the first run (no baseline to compare against) */
32
+ firstRun?: boolean;
33
+ }
34
+ /**
35
+ * Generate a markdown PR comment for a repo-based evaluation.
36
+ */
37
+ export declare function generateRepoEvalComment(options: RepoCommentOptions): string;
@@ -0,0 +1,165 @@
1
+ /**
2
+ * pipeline/repo-eval-comment.ts
3
+ *
4
+ * Generates markdown PR comments for repo-based AILF evaluations.
5
+ *
6
+ * This is separate from the main pr-comment.ts (which generates the
7
+ * internal AILF repo's PR comment). Repo eval comments are designed
8
+ * for external teams and emphasize:
9
+ *
10
+ * - Documentation quality (not code quality)
11
+ * - Per-task threshold pass/fail status
12
+ * - Clear "what does this mean?" context
13
+ * - skip-ailf bypass instructions
14
+ *
15
+ * @see docs/exec-plans/completed/tasks-as-content/phase-6-pr-quality-gates.md
16
+ * @see packages/eval/src/pipeline/repo-threshold-evaluator.ts
17
+ */
18
+ // ---------------------------------------------------------------------------
19
+ // Public API
20
+ // ---------------------------------------------------------------------------
21
+ /**
22
+ * Generate a markdown PR comment for a repo-based evaluation.
23
+ */
24
+ export function generateRepoEvalComment(options) {
25
+ const { thresholdEval, scoreSummary, comparison, reportUrl, promptfooUrl, firstRun, } = options;
26
+ const lines = [];
27
+ // Comment marker for update-in-place
28
+ lines.push("<!-- ailf-repo-eval-report -->");
29
+ // Header
30
+ const statusEmoji = thresholdEval.checkPassed ? "✅" : "⚠️";
31
+ lines.push(`## 📊 AI Literacy Evaluation`);
32
+ lines.push("");
33
+ // Summary line
34
+ lines.push(`${statusEmoji} Your team's documentation quality score is **${thresholdEval.overallScore}/100**` +
35
+ (thresholdEval.defaultThreshold > 0
36
+ ? ` (threshold: ${thresholdEval.defaultThreshold})`
37
+ : "") +
38
+ ". " +
39
+ contextMessage(thresholdEval, comparison, firstRun));
40
+ lines.push("");
41
+ // Cost info
42
+ const totalCost = scoreSummary.scores.reduce((sum, s) => sum + (s.totalCost ?? 0), 0);
43
+ const graderCost = scoreSummary.overall.cost?.graderTotal ?? 0;
44
+ const combinedCost = totalCost + graderCost;
45
+ const testCount = scoreSummary.scores.reduce((sum, s) => sum + s.testCount, 0);
46
+ const costStr = combinedCost > 0 ? ` · Cost: ${formatCost(combinedCost)}` : "";
47
+ lines.push(`**Overall:** ${thresholdEval.overallScore}/100 · ` +
48
+ `**Doc lift:** +${Math.round(scoreSummary.overall.avgDocLift)} points · ` +
49
+ `${testCount} tests${costStr}`);
50
+ lines.push("");
51
+ // Per-task threshold table
52
+ if (thresholdEval.results.length > 0) {
53
+ lines.push("### Per-Task Results");
54
+ lines.push("");
55
+ lines.push("| Task | Score | Threshold | Status |");
56
+ lines.push("|------|-------|-----------|--------|");
57
+ for (const result of thresholdEval.results) {
58
+ lines.push(`| ${result.taskId} | ${result.actualScore} | ${result.threshold} | ${statusIcon(result)} |`);
59
+ }
60
+ lines.push("");
61
+ }
62
+ // Dimension details (collapsible)
63
+ const withDimensions = thresholdEval.results.filter((r) => r.dimensionResults && r.dimensionResults.length > 0);
64
+ if (withDimensions.length > 0) {
65
+ lines.push("<details>");
66
+ lines.push("<summary>📊 Per-dimension breakdown</summary>");
67
+ lines.push("");
68
+ lines.push("| Task | Dimension | Score | Threshold | Status |");
69
+ lines.push("|------|-----------|-------|-----------|--------|");
70
+ for (const result of withDimensions) {
71
+ for (const dim of result.dimensionResults) {
72
+ lines.push(`| ${result.taskId} | ${dim.dimension} | ${dim.actual} | ${dim.threshold} | ${dim.passed ? "✅" : "❌"} |`);
73
+ }
74
+ }
75
+ lines.push("");
76
+ lines.push("</details>");
77
+ lines.push("");
78
+ }
79
+ // Comparison section (when baseline exists)
80
+ if (comparison && !firstRun) {
81
+ const delta = comparison.deltas.overall;
82
+ const direction = delta > 0 ? "up" : delta < 0 ? "down" : "unchanged";
83
+ const arrow = delta > 0 ? "📈" : delta < 0 ? "📉" : "➡️";
84
+ lines.push(`${arrow} Compared to last run: overall **${delta > 0 ? "+" : ""}${Math.round(delta)}** points (${direction}).`);
85
+ lines.push("");
86
+ if (comparison.regressed.length > 0) {
87
+ lines.push(`> ⚠️ **Regressions detected:** ${comparison.regressed.map((a) => `\`${a}\``).join(", ")}`);
88
+ lines.push("");
89
+ }
90
+ }
91
+ // Score breakdown (collapsible)
92
+ if (scoreSummary.scores.length > 0) {
93
+ lines.push("<details>");
94
+ lines.push("<summary>📋 Full score breakdown</summary>");
95
+ lines.push("");
96
+ lines.push("| Feature Area | Score | Task Completion | Code Correctness | Doc Coverage | Doc Lift |");
97
+ lines.push("|-------------|-------|-----------------|------------------|-------------|----------|");
98
+ const sorted = [...scoreSummary.scores].sort((a, b) => b.totalScore - a.totalScore);
99
+ for (const s of sorted) {
100
+ lines.push(`| ${s.feature} | **${Math.round(s.totalScore)}** | ${Math.round(s.taskCompletion)} | ${Math.round(s.codeCorrectness)} | ${Math.round(s.docCoverage)} | ${liftArrow(s.docLift)} |`);
101
+ }
102
+ lines.push("");
103
+ lines.push("</details>");
104
+ lines.push("");
105
+ }
106
+ // Footer with links and bypass instructions
107
+ const links = [];
108
+ if (reportUrl)
109
+ links.push(`[View full report in Studio](${reportUrl})`);
110
+ if (promptfooUrl)
111
+ links.push(`[Detailed results](${promptfooUrl})`);
112
+ if (links.length > 0) {
113
+ lines.push(links.join(" · "));
114
+ lines.push("");
115
+ }
116
+ lines.push(`> 💡 This check evaluates **documentation quality**, not code correctness. ` +
117
+ `Add the \`skip-ailf\` label to bypass.`);
118
+ lines.push("");
119
+ // Timestamp
120
+ const now = new Date();
121
+ lines.push(`*Generated by [AI Literacy Framework](https://github.com/sanity-labs/ai-literacy-framework) · ${now.toUTCString()}*`);
122
+ return lines.join("\n");
123
+ }
124
+ // ---------------------------------------------------------------------------
125
+ // Internal helpers
126
+ // ---------------------------------------------------------------------------
127
+ function contextMessage(eval_, comparison, firstRun) {
128
+ if (firstRun) {
129
+ return "This is the first evaluation for this task set. Future runs will show trends.";
130
+ }
131
+ if (eval_.checkPassed) {
132
+ return "Documentation quality meets your team's bar.";
133
+ }
134
+ if (comparison) {
135
+ return ("This reflects the current state of your product's documentation, " +
136
+ "not changes in this PR.");
137
+ }
138
+ return ("Some tasks are below their configured thresholds. " +
139
+ "Review the documentation for the affected areas.");
140
+ }
141
+ function statusIcon(result) {
142
+ switch (result.status) {
143
+ case "passed":
144
+ return "✅ Pass";
145
+ case "warning":
146
+ return "⚠️ Below threshold";
147
+ case "failed":
148
+ return "❌ Blocked";
149
+ }
150
+ }
151
+ function formatCost(cost) {
152
+ if (cost === 0)
153
+ return "$0.00";
154
+ if (cost < 0.01)
155
+ return `$${cost.toFixed(4)}`;
156
+ return `$${cost.toFixed(2)}`;
157
+ }
158
+ function liftArrow(lift) {
159
+ const rounded = Math.round(lift);
160
+ if (rounded > 0)
161
+ return `↑ +${rounded}`;
162
+ if (rounded < 0)
163
+ return `↓ ${rounded}`;
164
+ return "→ 0";
165
+ }
@@ -0,0 +1,89 @@
1
+ /**
2
+ * pipeline/repo-threshold-evaluator.ts
3
+ *
4
+ * Evaluates per-task scores against thresholds configured in repo task
5
+ * definitions (`.ailf/tasks/*.yaml` → `execution.threshold`).
6
+ *
7
+ * This is distinct from the readiness-gate threshold system in
8
+ * `config/thresholds.yaml`. Repo thresholds are per-task, defined by
9
+ * the product team, and drive PR check pass/fail status. Framework
10
+ * thresholds are per-area, defined by the AILF team, and drive
11
+ * readiness reports.
12
+ *
13
+ * @see docs/exec-plans/completed/tasks-as-content/phase-6-pr-quality-gates.md
14
+ * @see packages/eval/src/adapters/task-sources/repo-schemas.ts
15
+ */
16
+ import type { ScoreSummary } from "./types.js";
17
+ /** Threshold configuration from a repo task's execution block */
18
+ export interface TaskThresholdConfig {
19
+ /** Task ID */
20
+ taskId: string;
21
+ /** Whether failing this threshold should block the PR */
22
+ blocking: boolean;
23
+ /** Minimum overall score (0–100) */
24
+ score?: number;
25
+ /** Per-dimension minimums */
26
+ dimensions?: {
27
+ taskCompletion?: number;
28
+ codeCorrectness?: number;
29
+ docCoverage?: number;
30
+ };
31
+ }
32
+ /** Result of evaluating a single task against its threshold */
33
+ export interface TaskThresholdResult {
34
+ /** Task ID */
35
+ taskId: string;
36
+ /** Feature area */
37
+ area: string;
38
+ /** Actual overall score */
39
+ actualScore: number;
40
+ /** Configured threshold (or framework default) */
41
+ threshold: number;
42
+ /** Whether the score meets or exceeds the threshold */
43
+ passed: boolean;
44
+ /** Whether this result blocks the PR */
45
+ blocking: boolean;
46
+ /** Per-dimension results (when dimension thresholds are configured) */
47
+ dimensionResults?: DimensionThresholdResult[];
48
+ /** Status classification */
49
+ status: "passed" | "warning" | "failed";
50
+ }
51
+ /** Result of evaluating a single dimension against its threshold */
52
+ export interface DimensionThresholdResult {
53
+ dimension: string;
54
+ actual: number;
55
+ threshold: number;
56
+ passed: boolean;
57
+ }
58
+ /** Aggregate result of evaluating all tasks */
59
+ export interface RepoThresholdEvaluation {
60
+ /** Whether the overall check passes (no blocking failures) */
61
+ checkPassed: boolean;
62
+ /** Individual task results */
63
+ results: TaskThresholdResult[];
64
+ /** Summary counts */
65
+ summary: {
66
+ total: number;
67
+ passed: number;
68
+ warnings: number;
69
+ failed: number;
70
+ };
71
+ /** Overall score (average across all evaluated tasks) */
72
+ overallScore: number;
73
+ /** Default threshold used when tasks don't specify one */
74
+ defaultThreshold: number;
75
+ }
76
+ /**
77
+ * Evaluate task scores against repo-configured thresholds.
78
+ *
79
+ * For each task with a threshold configured in its `execution` block,
80
+ * compare the actual score against the threshold and determine pass/fail.
81
+ *
82
+ * Tasks without explicit thresholds use the `configDefaultThreshold`
83
+ * (from `.ailf/config.yaml`) or the framework default (0 = no gate).
84
+ *
85
+ * @param scoreSummary - The evaluation score summary
86
+ * @param thresholds - Per-task threshold configs (from repo task definitions)
87
+ * @param configDefaultThreshold - Default threshold from .ailf/config.yaml
88
+ */
89
+ export declare function evaluateRepoThresholds(scoreSummary: ScoreSummary, thresholds: TaskThresholdConfig[], configDefaultThreshold?: number): RepoThresholdEvaluation;
@@ -0,0 +1,162 @@
1
+ /**
2
+ * pipeline/repo-threshold-evaluator.ts
3
+ *
4
+ * Evaluates per-task scores against thresholds configured in repo task
5
+ * definitions (`.ailf/tasks/*.yaml` → `execution.threshold`).
6
+ *
7
+ * This is distinct from the readiness-gate threshold system in
8
+ * `config/thresholds.yaml`. Repo thresholds are per-task, defined by
9
+ * the product team, and drive PR check pass/fail status. Framework
10
+ * thresholds are per-area, defined by the AILF team, and drive
11
+ * readiness reports.
12
+ *
13
+ * @see docs/exec-plans/completed/tasks-as-content/phase-6-pr-quality-gates.md
14
+ * @see packages/eval/src/adapters/task-sources/repo-schemas.ts
15
+ */
16
+ // ---------------------------------------------------------------------------
17
+ // Constants
18
+ // ---------------------------------------------------------------------------
19
+ /** Framework default threshold when neither task nor config specifies one */
20
+ const FRAMEWORK_DEFAULT_THRESHOLD = 0;
21
+ // ---------------------------------------------------------------------------
22
+ // Public API
23
+ // ---------------------------------------------------------------------------
24
+ /**
25
+ * Evaluate task scores against repo-configured thresholds.
26
+ *
27
+ * For each task with a threshold configured in its `execution` block,
28
+ * compare the actual score against the threshold and determine pass/fail.
29
+ *
30
+ * Tasks without explicit thresholds use the `configDefaultThreshold`
31
+ * (from `.ailf/config.yaml`) or the framework default (0 = no gate).
32
+ *
33
+ * @param scoreSummary - The evaluation score summary
34
+ * @param thresholds - Per-task threshold configs (from repo task definitions)
35
+ * @param configDefaultThreshold - Default threshold from .ailf/config.yaml
36
+ */
37
+ export function evaluateRepoThresholds(scoreSummary, thresholds, configDefaultThreshold = FRAMEWORK_DEFAULT_THRESHOLD) {
38
+ const thresholdMap = new Map(thresholds.map((t) => [t.taskId, t]));
39
+ // Build a task-to-area mapping from the score summary
40
+ const taskScoreMap = buildTaskScoreMap(scoreSummary);
41
+ const results = [];
42
+ // Evaluate each task that has scores
43
+ for (const [taskId, taskScore] of taskScoreMap) {
44
+ const config = thresholdMap.get(taskId);
45
+ const threshold = config?.score ?? configDefaultThreshold;
46
+ const blocking = config?.blocking ?? false;
47
+ const actualScore = Math.round(taskScore.score);
48
+ const passed = actualScore >= threshold;
49
+ // Evaluate dimension thresholds
50
+ let dimensionResults;
51
+ if (config?.dimensions) {
52
+ dimensionResults = evaluateDimensions(taskScore, config.dimensions);
53
+ }
54
+ const dimensionsFailed = dimensionResults?.some((d) => !d.passed) ?? false;
55
+ const overallPassed = passed && !dimensionsFailed;
56
+ results.push({
57
+ taskId,
58
+ area: taskScore.area,
59
+ actualScore,
60
+ threshold,
61
+ passed: overallPassed,
62
+ blocking,
63
+ dimensionResults,
64
+ status: overallPassed ? "passed" : blocking ? "failed" : "warning",
65
+ });
66
+ }
67
+ // Also add entries for threshold-configured tasks that weren't evaluated
68
+ for (const config of thresholds) {
69
+ if (!taskScoreMap.has(config.taskId)) {
70
+ results.push({
71
+ taskId: config.taskId,
72
+ area: "unknown",
73
+ actualScore: 0,
74
+ threshold: config.score ?? configDefaultThreshold,
75
+ passed: false,
76
+ blocking: config.blocking,
77
+ status: config.blocking ? "failed" : "warning",
78
+ });
79
+ }
80
+ }
81
+ // Sort: failed first, then warnings, then passed
82
+ results.sort((a, b) => {
83
+ const order = { failed: 0, warning: 1, passed: 2 };
84
+ return order[a.status] - order[b.status];
85
+ });
86
+ const passed = results.filter((r) => r.status === "passed").length;
87
+ const warnings = results.filter((r) => r.status === "warning").length;
88
+ const failed = results.filter((r) => r.status === "failed").length;
89
+ // Check passes if no blocking failures
90
+ const checkPassed = failed === 0;
91
+ // Overall score
92
+ const evaluatedResults = results.filter((r) => r.actualScore > 0);
93
+ const overallScore = evaluatedResults.length > 0
94
+ ? Math.round(evaluatedResults.reduce((sum, r) => sum + r.actualScore, 0) /
95
+ evaluatedResults.length)
96
+ : 0;
97
+ return {
98
+ checkPassed,
99
+ results,
100
+ summary: {
101
+ total: results.length,
102
+ passed,
103
+ warnings,
104
+ failed,
105
+ },
106
+ overallScore,
107
+ defaultThreshold: configDefaultThreshold,
108
+ };
109
+ }
110
+ /**
111
+ * Build a map of task ID → score from the ScoreSummary.
112
+ *
113
+ * ScoreSummary groups scores by feature area, not by task. We derive
114
+ * per-task scores from the area scores. When multiple tasks share an
115
+ * area, they share the area's composite score (this is a simplification;
116
+ * per-task scoring requires individual test results not available in
117
+ * ScoreSummary).
118
+ */
119
+ function buildTaskScoreMap(summary) {
120
+ const map = new Map();
121
+ for (const areaScore of summary.scores) {
122
+ // Use the feature area name as a proxy task ID if we don't have
123
+ // per-task granularity. In practice, repo tasks map 1:1 to areas
124
+ // in most cases (each .ailf/tasks/*.yaml file is one area).
125
+ map.set(areaScore.feature, {
126
+ area: areaScore.feature,
127
+ score: areaScore.totalScore,
128
+ taskCompletion: areaScore.taskCompletion,
129
+ codeCorrectness: areaScore.codeCorrectness,
130
+ docCoverage: areaScore.docCoverage,
131
+ });
132
+ }
133
+ return map;
134
+ }
135
+ function evaluateDimensions(score, thresholds) {
136
+ const results = [];
137
+ if (thresholds.taskCompletion !== undefined) {
138
+ results.push({
139
+ dimension: "task-completion",
140
+ actual: Math.round(score.taskCompletion),
141
+ threshold: thresholds.taskCompletion,
142
+ passed: Math.round(score.taskCompletion) >= thresholds.taskCompletion,
143
+ });
144
+ }
145
+ if (thresholds.codeCorrectness !== undefined) {
146
+ results.push({
147
+ dimension: "code-correctness",
148
+ actual: Math.round(score.codeCorrectness),
149
+ threshold: thresholds.codeCorrectness,
150
+ passed: Math.round(score.codeCorrectness) >= thresholds.codeCorrectness,
151
+ });
152
+ }
153
+ if (thresholds.docCoverage !== undefined) {
154
+ results.push({
155
+ dimension: "doc-coverage",
156
+ actual: Math.round(score.docCoverage),
157
+ threshold: thresholds.docCoverage,
158
+ passed: Math.round(score.docCoverage) >= thresholds.docCoverage,
159
+ });
160
+ }
161
+ return results;
162
+ }