@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,35 @@
1
+ /**
2
+ * pipeline/resolve-mappings.ts
3
+ *
4
+ * Extracts canonical mappings from inline task YAML definitions.
5
+ * Each task file contains canonical_docs and reference_solution fields
6
+ * directly — there is no separate mappings.yaml file.
7
+ *
8
+ * The output shape matches what downstream consumers expect so
9
+ * fetch-docs, validate, and calculate-scores work without changes.
10
+ */
11
+ export interface CanonicalDoc {
12
+ reason: string;
13
+ slug: string;
14
+ }
15
+ export interface CanonicalTask {
16
+ canonical_docs: CanonicalDoc[];
17
+ description: string;
18
+ id: string;
19
+ reference_solution: string;
20
+ }
21
+ export interface ResolvedMappings {
22
+ feature_areas: Record<string, {
23
+ tasks: CanonicalTask[];
24
+ }>;
25
+ }
26
+ /**
27
+ * Extract inline canonical mappings from task YAML files.
28
+ * Only tasks with both an id and canonical_docs fields are included.
29
+ */
30
+ export declare function extractInlineMappings(rootDir: string): ResolvedMappings;
31
+ /**
32
+ * Resolve canonical mappings from inline task YAML definitions.
33
+ * This is the single source of truth — there is no external mappings file.
34
+ */
35
+ export declare function resolveMappings(rootDir: string): ResolvedMappings;
@@ -0,0 +1,72 @@
1
+ /**
2
+ * pipeline/resolve-mappings.ts
3
+ *
4
+ * Extracts canonical mappings from inline task YAML definitions.
5
+ * Each task file contains canonical_docs and reference_solution fields
6
+ * directly — there is no separate mappings.yaml file.
7
+ *
8
+ * The output shape matches what downstream consumers expect so
9
+ * fetch-docs, validate, and calculate-scores work without changes.
10
+ */
11
+ import { existsSync, readFileSync, readdirSync } from "fs";
12
+ import { resolve } from "path";
13
+ import { load } from "js-yaml";
14
+ // ---------------------------------------------------------------------------
15
+ // Resolution
16
+ // ---------------------------------------------------------------------------
17
+ /**
18
+ * Extract inline canonical mappings from task YAML files.
19
+ * Only tasks with both an id and canonical_docs fields are included.
20
+ */
21
+ export function extractInlineMappings(rootDir) {
22
+ const tasksDir = resolve(rootDir, "tasks");
23
+ const result = { feature_areas: {} };
24
+ if (!existsSync(tasksDir))
25
+ return result;
26
+ const yamlFiles = readdirSync(tasksDir)
27
+ .filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."))
28
+ .sort();
29
+ for (const file of yamlFiles) {
30
+ const featureArea = file.replace(/\.(yaml|yml)$/, "");
31
+ const filePath = resolve(tasksDir, file);
32
+ const raw = readFileSync(filePath, "utf-8");
33
+ const parsed = load(raw);
34
+ if (!Array.isArray(parsed))
35
+ continue;
36
+ const tasks = [];
37
+ for (const entry of parsed) {
38
+ if (!isInlineTaskWithMappings(entry))
39
+ continue;
40
+ tasks.push({
41
+ canonical_docs: entry.canonical_docs,
42
+ description: entry.description,
43
+ id: entry.id,
44
+ reference_solution: entry.reference_solution,
45
+ });
46
+ }
47
+ if (tasks.length > 0) {
48
+ result.feature_areas[featureArea] = { tasks };
49
+ }
50
+ }
51
+ return result;
52
+ }
53
+ /**
54
+ * Resolve canonical mappings from inline task YAML definitions.
55
+ * This is the single source of truth — there is no external mappings file.
56
+ */
57
+ export function resolveMappings(rootDir) {
58
+ return extractInlineMappings(rootDir);
59
+ }
60
+ // ---------------------------------------------------------------------------
61
+ // Helpers
62
+ // ---------------------------------------------------------------------------
63
+ function isInlineTaskWithMappings(entry) {
64
+ if (typeof entry !== "object" || entry === null)
65
+ return false;
66
+ const e = entry;
67
+ return (typeof e.id === "string" &&
68
+ typeof e.description === "string" &&
69
+ Array.isArray(e.canonical_docs) &&
70
+ e.canonical_docs.length > 0 &&
71
+ typeof e.reference_solution === "string");
72
+ }
@@ -0,0 +1,39 @@
1
+ /**
2
+ * pipeline/retrieval-metrics.ts
3
+ *
4
+ * Computes retrieval precision and recall by comparing agent-retrieved
5
+ * doc slugs against canonical_docs defined in task YAML files.
6
+ *
7
+ * This is a pure computation module — no file I/O beyond reading task YAMLs.
8
+ */
9
+ import type { RetrievalMetrics, TaskRetrievalMetrics } from "./types.js";
10
+ export interface AgenticBehaviorData {
11
+ /** Feature area (from detectFeatureArea) */
12
+ area: string;
13
+ /** Test description (used to match tasks) */
14
+ description: string;
15
+ /** Doc slugs the agent actually visited */
16
+ docSlugsVisited: string[];
17
+ /** Task ID (extracted from description if possible) */
18
+ taskId?: string;
19
+ }
20
+ /**
21
+ * Compute retrieval metrics from agentic behavior data.
22
+ *
23
+ * @param rootDir Package root (to find tasks/*.yaml)
24
+ * @param behaviors Agentic behavior data per task
25
+ * @returns RetrievalMetrics or null if no canonical docs or behavior data
26
+ */
27
+ export declare function computeRetrievalMetrics(rootDir: string, behaviors: AgenticBehaviorData[]): null | RetrievalMetrics;
28
+ /**
29
+ * Compute retrieval metrics for a single task.
30
+ */
31
+ export declare function computeTaskMetrics(taskId: string, area: string, retrieved: string[], canonical: Set<string>): TaskRetrievalMetrics;
32
+ /**
33
+ * Load canonical_docs from all task YAML files.
34
+ * Returns a map of taskId → { slugs: Set<string>, area: string }.
35
+ */
36
+ export declare function loadCanonicalDocs(rootDir: string): Map<string, {
37
+ area: string;
38
+ slugs: Set<string>;
39
+ }>;
@@ -0,0 +1,136 @@
1
+ /**
2
+ * pipeline/retrieval-metrics.ts
3
+ *
4
+ * Computes retrieval precision and recall by comparing agent-retrieved
5
+ * doc slugs against canonical_docs defined in task YAML files.
6
+ *
7
+ * This is a pure computation module — no file I/O beyond reading task YAMLs.
8
+ */
9
+ import { existsSync, readFileSync, readdirSync } from "fs";
10
+ import { resolve } from "path";
11
+ import { load } from "js-yaml";
12
+ /**
13
+ * Compute retrieval metrics from agentic behavior data.
14
+ *
15
+ * @param rootDir Package root (to find tasks/*.yaml)
16
+ * @param behaviors Agentic behavior data per task
17
+ * @returns RetrievalMetrics or null if no canonical docs or behavior data
18
+ */
19
+ export function computeRetrievalMetrics(rootDir, behaviors) {
20
+ const canonicalMap = loadCanonicalDocs(rootDir);
21
+ if (canonicalMap.size === 0)
22
+ return null;
23
+ if (behaviors.length === 0)
24
+ return null;
25
+ // Compute per-task metrics
26
+ const taskMetrics = [];
27
+ for (const behavior of behaviors) {
28
+ const taskId = behavior.taskId;
29
+ if (!taskId)
30
+ continue;
31
+ const canonical = canonicalMap.get(taskId);
32
+ if (!canonical)
33
+ continue;
34
+ taskMetrics.push(computeTaskMetrics(taskId, canonical.area, behavior.docSlugsVisited, canonical.slugs));
35
+ }
36
+ if (taskMetrics.length === 0)
37
+ return null;
38
+ // Group by area
39
+ const byArea = new Map();
40
+ for (const tm of taskMetrics) {
41
+ if (!byArea.has(tm.area))
42
+ byArea.set(tm.area, []);
43
+ byArea.get(tm.area).push(tm);
44
+ }
45
+ const areas = [...byArea.entries()]
46
+ .sort(([a], [b]) => a.localeCompare(b))
47
+ .map(([area, tasks]) => {
48
+ const avgPrecision = tasks.reduce((s, t) => s + t.precision, 0) / tasks.length;
49
+ const avgRecall = tasks.reduce((s, t) => s + t.recall, 0) / tasks.length;
50
+ const avgF1 = tasks.reduce((s, t) => s + t.f1, 0) / tasks.length;
51
+ // Invisible docs: canonical docs never retrieved across tasks in this area
52
+ const allRetrieved = new Set(tasks.flatMap((t) => t.retrieved));
53
+ const allExpected = new Set(tasks.flatMap((t) => t.expected));
54
+ const invisibleDocs = [...allExpected]
55
+ .filter((s) => !allRetrieved.has(s))
56
+ .sort();
57
+ return {
58
+ area,
59
+ avgF1,
60
+ avgPrecision,
61
+ avgRecall,
62
+ invisibleDocs,
63
+ taskCount: tasks.length,
64
+ tasks,
65
+ };
66
+ });
67
+ // Overall averages
68
+ const overall = {
69
+ avgF1: areas.reduce((s, a) => s + a.avgF1, 0) / (areas.length || 1),
70
+ avgPrecision: areas.reduce((s, a) => s + a.avgPrecision, 0) / (areas.length || 1),
71
+ avgRecall: areas.reduce((s, a) => s + a.avgRecall, 0) / (areas.length || 1),
72
+ };
73
+ // Global invisible docs
74
+ const allRetrieved = new Set(taskMetrics.flatMap((t) => t.retrieved));
75
+ const allExpected = new Set(taskMetrics.flatMap((t) => t.expected));
76
+ const globalInvisibleDocs = [...allExpected]
77
+ .filter((s) => !allRetrieved.has(s))
78
+ .sort();
79
+ return {
80
+ areas,
81
+ globalInvisibleDocs,
82
+ overall,
83
+ };
84
+ }
85
+ /**
86
+ * Compute retrieval metrics for a single task.
87
+ */
88
+ export function computeTaskMetrics(taskId, area, retrieved, canonical) {
89
+ const retrievedSet = new Set(retrieved);
90
+ const hits = [...retrievedSet].filter((s) => canonical.has(s));
91
+ const missed = [...canonical].filter((s) => !retrievedSet.has(s));
92
+ const unexpected = [...retrievedSet].filter((s) => !canonical.has(s));
93
+ const precision = retrievedSet.size > 0 ? hits.length / retrievedSet.size : 0;
94
+ const recall = canonical.size > 0 ? hits.length / canonical.size : 0;
95
+ const f1 = precision + recall > 0 ? (2 * precision * recall) / (precision + recall) : 0;
96
+ return {
97
+ area,
98
+ expected: [...canonical],
99
+ f1,
100
+ hits,
101
+ missed,
102
+ precision,
103
+ recall,
104
+ retrieved: [...retrievedSet],
105
+ taskId,
106
+ unexpected,
107
+ };
108
+ }
109
+ /**
110
+ * Load canonical_docs from all task YAML files.
111
+ * Returns a map of taskId → { slugs: Set<string>, area: string }.
112
+ */
113
+ export function loadCanonicalDocs(rootDir) {
114
+ const tasksDir = resolve(rootDir, "tasks");
115
+ if (!existsSync(tasksDir))
116
+ return new Map();
117
+ const result = new Map();
118
+ const files = readdirSync(tasksDir).filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."));
119
+ for (const file of files) {
120
+ const area = file.replace(/\.ya?ml$/, "");
121
+ const raw = readFileSync(resolve(tasksDir, file), "utf-8");
122
+ const parsed = load(raw);
123
+ if (!Array.isArray(parsed))
124
+ continue;
125
+ for (const entry of parsed) {
126
+ const task = entry;
127
+ if (task.id && task.canonical_docs && task.canonical_docs.length > 0) {
128
+ result.set(task.id, {
129
+ area,
130
+ slugs: new Set(task.canonical_docs.map((d) => d.slug)),
131
+ });
132
+ }
133
+ }
134
+ }
135
+ return result;
136
+ }
@@ -0,0 +1,67 @@
1
+ /**
2
+ * pipeline/reverse-mapping.ts
3
+ *
4
+ * Builds a reverse index from document slugs to feature areas.
5
+ *
6
+ * The canonical_docs in each task YAML define which docs are relevant to
7
+ * each task. This module inverts that mapping: given a document slug,
8
+ * return which feature areas (and specific tasks) would be affected if
9
+ * that document changed.
10
+ *
11
+ * This is the foundation for event-driven evaluation triggers — when a
12
+ * content webhook fires for a document, we look up affected areas and
13
+ * dispatch a scoped evaluation.
14
+ *
15
+ * @see docs/design-docs/report-store/visibility-workflows.md — Event-Driven Triggers
16
+ */
17
+ import { type ResolvedMappings } from "./resolve-mappings.js";
18
+ /** A feature area + task affected by a document change */
19
+ export interface AffectedTarget {
20
+ /** Feature area name (filename stem, e.g., "groq") */
21
+ area: string;
22
+ /** Task ID within the area (e.g., "groq-blog-queries") */
23
+ taskId: string;
24
+ }
25
+ /** Summary of impact for a set of changed slugs */
26
+ export interface ImpactSummary {
27
+ /** Unique affected area names (for --area flag) */
28
+ areas: string[];
29
+ /** All affected targets with area + task detail */
30
+ targets: AffectedTarget[];
31
+ /** Unique affected task IDs (for --task flag) */
32
+ taskIds: string[];
33
+ /** Slugs that don't match any canonical docs (untracked) */
34
+ unmatchedSlugs: string[];
35
+ }
36
+ /** The reverse mapping: document slug → affected areas/tasks */
37
+ export type ReverseMapping = Map<string, AffectedTarget[]>;
38
+ /**
39
+ * Get all unique canonical doc slugs across all tasks.
40
+ * Useful for checking if a webhook payload matches any tracked document.
41
+ */
42
+ export declare function allTrackedSlugs(reverseMapping: ReverseMapping): string[];
43
+ /**
44
+ * Given a set of changed document slugs, determine which areas and tasks
45
+ * are affected and should be re-evaluated.
46
+ *
47
+ * @param slugs - Document slugs that changed
48
+ * @param reverseMapping - Pre-built reverse mapping (or will build from rootDir)
49
+ * @returns Impact summary with deduplicated areas and tasks
50
+ */
51
+ export declare function assessImpact(slugs: string[], reverseMapping: ReverseMapping): ImpactSummary;
52
+ /**
53
+ * Build a reverse mapping from document slugs to feature areas.
54
+ *
55
+ * Reads all task YAML files and inverts the canonical_docs mappings.
56
+ * A single slug may map to multiple areas/tasks (e.g., "groq-introduction"
57
+ * is referenced by multiple GROQ tasks).
58
+ *
59
+ * @param rootDir - Path to the eval package root (packages/eval)
60
+ * @returns Map from slug → array of affected targets
61
+ */
62
+ export declare function buildReverseMapping(rootDir: string): ReverseMapping;
63
+ /**
64
+ * Build a reverse mapping from already-resolved mappings.
65
+ * Useful for testing without filesystem access.
66
+ */
67
+ export declare function buildReverseMappingFromResolved(mappings: ResolvedMappings): ReverseMapping;
@@ -0,0 +1,88 @@
1
+ /**
2
+ * pipeline/reverse-mapping.ts
3
+ *
4
+ * Builds a reverse index from document slugs to feature areas.
5
+ *
6
+ * The canonical_docs in each task YAML define which docs are relevant to
7
+ * each task. This module inverts that mapping: given a document slug,
8
+ * return which feature areas (and specific tasks) would be affected if
9
+ * that document changed.
10
+ *
11
+ * This is the foundation for event-driven evaluation triggers — when a
12
+ * content webhook fires for a document, we look up affected areas and
13
+ * dispatch a scoped evaluation.
14
+ *
15
+ * @see docs/design-docs/report-store/visibility-workflows.md — Event-Driven Triggers
16
+ */
17
+ import { resolveMappings } from "./resolve-mappings.js";
18
+ // ---------------------------------------------------------------------------
19
+ // Public API
20
+ // ---------------------------------------------------------------------------
21
+ /**
22
+ * Get all unique canonical doc slugs across all tasks.
23
+ * Useful for checking if a webhook payload matches any tracked document.
24
+ */
25
+ export function allTrackedSlugs(reverseMapping) {
26
+ return [...reverseMapping.keys()].sort();
27
+ }
28
+ /**
29
+ * Given a set of changed document slugs, determine which areas and tasks
30
+ * are affected and should be re-evaluated.
31
+ *
32
+ * @param slugs - Document slugs that changed
33
+ * @param reverseMapping - Pre-built reverse mapping (or will build from rootDir)
34
+ * @returns Impact summary with deduplicated areas and tasks
35
+ */
36
+ export function assessImpact(slugs, reverseMapping) {
37
+ const allTargets = [];
38
+ const unmatchedSlugs = [];
39
+ for (const slug of slugs) {
40
+ const targets = reverseMapping.get(slug);
41
+ if (targets && targets.length > 0) {
42
+ allTargets.push(...targets);
43
+ }
44
+ else {
45
+ unmatchedSlugs.push(slug);
46
+ }
47
+ }
48
+ // Deduplicate areas and task IDs
49
+ const areas = [...new Set(allTargets.map((t) => t.area))].sort();
50
+ const taskIds = [...new Set(allTargets.map((t) => t.taskId))].sort();
51
+ return {
52
+ areas,
53
+ targets: allTargets,
54
+ taskIds,
55
+ unmatchedSlugs,
56
+ };
57
+ }
58
+ /**
59
+ * Build a reverse mapping from document slugs to feature areas.
60
+ *
61
+ * Reads all task YAML files and inverts the canonical_docs mappings.
62
+ * A single slug may map to multiple areas/tasks (e.g., "groq-introduction"
63
+ * is referenced by multiple GROQ tasks).
64
+ *
65
+ * @param rootDir - Path to the eval package root (packages/eval)
66
+ * @returns Map from slug → array of affected targets
67
+ */
68
+ export function buildReverseMapping(rootDir) {
69
+ const mappings = resolveMappings(rootDir);
70
+ return buildReverseMappingFromResolved(mappings);
71
+ }
72
+ /**
73
+ * Build a reverse mapping from already-resolved mappings.
74
+ * Useful for testing without filesystem access.
75
+ */
76
+ export function buildReverseMappingFromResolved(mappings) {
77
+ const reverse = new Map();
78
+ for (const [area, config] of Object.entries(mappings.feature_areas)) {
79
+ for (const task of config.tasks) {
80
+ for (const doc of task.canonical_docs) {
81
+ const existing = reverse.get(doc.slug) ?? [];
82
+ existing.push({ area, taskId: task.id });
83
+ reverse.set(doc.slug, existing);
84
+ }
85
+ }
86
+ }
87
+ return reverse;
88
+ }
@@ -0,0 +1,9 @@
1
+ /**
2
+ * pipeline/schemas.ts — Re-export barrel
3
+ *
4
+ * All pipeline Zod schemas now live in @sanity/ailf-core. This file
5
+ * re-exports them for backward compatibility.
6
+ *
7
+ * @see packages/core/src/schemas/pipeline.ts (canonical source)
8
+ */
9
+ export * from "../_vendor/ailf-core/index.d.ts";
@@ -0,0 +1,9 @@
1
+ /**
2
+ * pipeline/schemas.ts — Re-export barrel
3
+ *
4
+ * All pipeline Zod schemas now live in @sanity/ailf-core. This file
5
+ * re-exports them for backward compatibility.
6
+ *
7
+ * @see packages/core/src/schemas/pipeline.ts (canonical source)
8
+ */
9
+ export * from "../_vendor/ailf-core/index.js";
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Pipeline step: Calculate AI Literacy Scores from eval results.
3
+ *
4
+ * Preconditions: eval-results.json exists and is valid
5
+ * Postconditions: score-summary.json exists and is valid
6
+ *
7
+ * Cache key: eval results JSON file(s)
8
+ * Cache outputs: results/latest/score-summary.json
9
+ */
10
+ import type { EvalMode, StepResult } from "../types.js";
11
+ export declare function runCalculateScores(source?: string, mode?: EvalMode, noCache?: boolean): StepResult;
@@ -0,0 +1,89 @@
1
+ /**
2
+ * Pipeline step: Calculate AI Literacy Scores from eval results.
3
+ *
4
+ * Preconditions: eval-results.json exists and is valid
5
+ * Postconditions: score-summary.json exists and is valid
6
+ *
7
+ * Cache key: eval results JSON file(s)
8
+ * Cache outputs: results/latest/score-summary.json
9
+ */
10
+ import { execSync } from "child_process";
11
+ import { dirname, resolve } from "path";
12
+ import { fileURLToPath } from "url";
13
+ import { getStepInputPaths, hashFiles, lookupCache, recordCache, } from "../cache.js";
14
+ import { checkResultsExist, checkScoreSummaryValid } from "../checks.js";
15
+ import { RESULTS_FILES } from "./eval-step.js";
16
+ const __dirname = dirname(fileURLToPath(import.meta.url));
17
+ const ROOT = resolve(__dirname, "..", "..", "..");
18
+ export function runCalculateScores(source, mode = "baseline", noCache = false) {
19
+ const start = Date.now();
20
+ // For full mode, use the baseline results file as the primary input
21
+ // (calculate-scores reads all available results files internally)
22
+ const primaryMode = mode === "full" ? "baseline" : mode;
23
+ const resultsFile = RESULTS_FILES[primaryMode];
24
+ const resultsIssues = checkResultsExist(ROOT, resultsFile);
25
+ const resultsErrors = resultsIssues.filter((i) => i.severity === "error");
26
+ if (resultsErrors.length > 0) {
27
+ return {
28
+ durationMs: Date.now() - start,
29
+ error: `Results missing: ${resultsErrors.map((e) => e.message).join("; ")}`,
30
+ status: "failed",
31
+ };
32
+ }
33
+ // Cache check
34
+ if (!noCache) {
35
+ const cacheResult = lookupCache(ROOT, "calculate-scores");
36
+ if (cacheResult.hit) {
37
+ return {
38
+ durationMs: Date.now() - start,
39
+ status: "success",
40
+ summary: `Skipped (cached) — ${cacheResult.entry.summary}`,
41
+ };
42
+ }
43
+ }
44
+ // Execute — note: calculate-scores exits 1 when areas are below critical,
45
+ // which is expected behavior, not an error
46
+ try {
47
+ const sourceArg = source ? ` --source ${source}` : "";
48
+ const resultsArg = primaryMode !== "baseline" ? ` ${resultsFile}` : "";
49
+ execSync(`tsx src/lib/calculate-scores.ts${resultsArg}${sourceArg}`, {
50
+ cwd: ROOT,
51
+ env: process.env,
52
+ stdio: "inherit",
53
+ });
54
+ }
55
+ catch (err) {
56
+ const code = err !== null && typeof err === "object" && "status" in err
57
+ ? err.status
58
+ : 1;
59
+ // Exit code 1 means "areas below critical" — that's expected
60
+ if (code !== 1) {
61
+ return {
62
+ durationMs: Date.now() - start,
63
+ error: `calculate-scores failed with exit code ${code}`,
64
+ status: "failed",
65
+ };
66
+ }
67
+ }
68
+ // Postcondition: score summary exists and is valid
69
+ const summaryIssues = checkScoreSummaryValid(ROOT);
70
+ const summaryErrors = summaryIssues.filter((i) => i.severity === "error");
71
+ if (summaryErrors.length > 0) {
72
+ return {
73
+ durationMs: Date.now() - start,
74
+ error: `Postcondition failed: ${summaryErrors.map((e) => e.message).join("; ")}`,
75
+ status: "failed",
76
+ };
77
+ }
78
+ const durationMs = Date.now() - start;
79
+ const summary = "Scores calculated and summary written";
80
+ // Record cache
81
+ if (!noCache) {
82
+ const inputPaths = getStepInputPaths(ROOT, "calculate-scores");
83
+ const inputHash = hashFiles(inputPaths);
84
+ recordCache(ROOT, "calculate-scores", inputHash, summary, durationMs, [
85
+ "results/latest/score-summary.json",
86
+ ]);
87
+ }
88
+ return { durationMs, status: "success", summary };
89
+ }
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Pipeline step: Compare current scores against a baseline.
3
+ *
4
+ * Preconditions: score-summary.json exists
5
+ * Postconditions: comparison-report.json written to results/latest/
6
+ *
7
+ * This step is optional — it only runs when --compare is passed
8
+ * (or a baseline exists and auto-compare is enabled).
9
+ */
10
+ import type { CompareOptions, StepResult } from "../types.js";
11
+ /**
12
+ * Run comparison against a baseline.
13
+ *
14
+ * @param rootDir Package root directory
15
+ * @param baselinePath Explicit baseline file path (optional — uses latest if omitted)
16
+ * @param options Compare options (noise threshold, etc.)
17
+ */
18
+ export declare function runCompare(rootDir: string, baselinePath?: string, options?: CompareOptions): StepResult;
@@ -0,0 +1,90 @@
1
+ /**
2
+ * Pipeline step: Compare current scores against a baseline.
3
+ *
4
+ * Preconditions: score-summary.json exists
5
+ * Postconditions: comparison-report.json written to results/latest/
6
+ *
7
+ * This step is optional — it only runs when --compare is passed
8
+ * (or a baseline exists and auto-compare is enabled).
9
+ */
10
+ import { existsSync, readFileSync, readdirSync, writeFileSync } from "fs";
11
+ import { join, resolve } from "path";
12
+ import { compare } from "../compare.js";
13
+ /**
14
+ * Run comparison against a baseline.
15
+ *
16
+ * @param rootDir Package root directory
17
+ * @param baselinePath Explicit baseline file path (optional — uses latest if omitted)
18
+ * @param options Compare options (noise threshold, etc.)
19
+ */
20
+ export function runCompare(rootDir, baselinePath, options) {
21
+ const start = Date.now();
22
+ const scoreSummaryPath = resolve(rootDir, "results", "latest", "score-summary.json");
23
+ if (!existsSync(scoreSummaryPath)) {
24
+ return {
25
+ durationMs: Date.now() - start,
26
+ error: "score-summary.json not found. Run calculate-scores first.",
27
+ status: "failed",
28
+ };
29
+ }
30
+ // Load experiment (current run)
31
+ const experiment = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
32
+ // Resolve baseline
33
+ let resolvedBaselinePath;
34
+ if (baselinePath) {
35
+ resolvedBaselinePath = resolve(baselinePath);
36
+ }
37
+ else {
38
+ const baselinesDir = resolve(rootDir, "results", "baselines");
39
+ if (!existsSync(baselinesDir)) {
40
+ return {
41
+ reason: "No baselines directory found. Run 'pnpm baseline:save' first.",
42
+ status: "skipped",
43
+ };
44
+ }
45
+ const files = readdirSync(baselinesDir)
46
+ .filter((f) => f.endsWith(".json"))
47
+ .sort()
48
+ .reverse();
49
+ if (files.length === 0) {
50
+ return {
51
+ reason: "No baseline files found. Run 'pnpm baseline:save' first.",
52
+ status: "skipped",
53
+ };
54
+ }
55
+ resolvedBaselinePath = join(baselinesDir, files[0]);
56
+ }
57
+ if (!existsSync(resolvedBaselinePath)) {
58
+ return {
59
+ durationMs: Date.now() - start,
60
+ error: `Baseline file not found: ${resolvedBaselinePath}`,
61
+ status: "failed",
62
+ };
63
+ }
64
+ const baseline = JSON.parse(readFileSync(resolvedBaselinePath, "utf-8"));
65
+ // Run comparison
66
+ const report = compare(baseline, experiment, options);
67
+ // Write report
68
+ const reportPath = resolve(rootDir, "results", "latest", "comparison-report.json");
69
+ writeFileSync(reportPath, JSON.stringify(report, null, 2));
70
+ // Build summary
71
+ const improved = report.improved.length;
72
+ const regressed = report.regressed.length;
73
+ const unchanged = report.unchanged.length;
74
+ const overallDelta = report.deltas.overall;
75
+ const deltaStr = overallDelta > 0
76
+ ? `+${Math.round(overallDelta)}`
77
+ : String(Math.round(overallDelta));
78
+ const parts = [`Overall: ${deltaStr}`];
79
+ if (improved > 0)
80
+ parts.push(`${improved} improved`);
81
+ if (regressed > 0)
82
+ parts.push(`${regressed} regressed`);
83
+ if (unchanged > 0)
84
+ parts.push(`${unchanged} unchanged`);
85
+ return {
86
+ durationMs: Date.now() - start,
87
+ status: "success",
88
+ summary: parts.join(", "),
89
+ };
90
+ }