@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,203 @@
1
+ /**
2
+ * webhook/handler.ts
3
+ *
4
+ * Platform-agnostic webhook handler for Sanity content change events.
5
+ *
6
+ * Receives Sanity webhook payloads, determines which evaluation areas
7
+ * are affected by the document change, debounces rapid edits, enforces
8
+ * daily budget limits, and dispatches scoped evaluations via GitHub Actions.
9
+ *
10
+ * This handler is stateless between requests (debounce and budget state
11
+ * is held in-memory by the enclosing WebhookHandler instance). It can be
12
+ * mounted in any HTTP framework: Express, Hono, Cloudflare Workers, etc.
13
+ *
14
+ * Flow:
15
+ * 1. Receive Sanity webhook payload
16
+ * 2. Extract document slug from payload
17
+ * 3. Look up affected areas via reverse mapping
18
+ * 4. If no areas affected → ignore (untracked document)
19
+ * 5. Check daily budget → rate-limit if exceeded
20
+ * 6. Push slug into debounce window
21
+ * 7. When debounce window closes → dispatch scoped eval via GitHub Actions
22
+ *
23
+ * @see docs/design-docs/report-store/visibility-workflows.md
24
+ */
25
+ import { assessImpact, buildReverseMapping, } from "../pipeline/reverse-mapping.js";
26
+ import { createBudgetTracker } from "./budget.js";
27
+ import { createDebouncer } from "./debounce.js";
28
+ import { dispatchEvaluation } from "./dispatch.js";
29
+ // ---------------------------------------------------------------------------
30
+ // Defaults
31
+ // ---------------------------------------------------------------------------
32
+ const DEFAULT_DEBOUNCE_MS = 300_000; // 5 minutes
33
+ const DEFAULT_DAILY_BUDGET = 20;
34
+ const DEFAULT_REPO = "sanity-labs/ai-literacy-framework";
35
+ // ---------------------------------------------------------------------------
36
+ // WebhookHandler
37
+ // ---------------------------------------------------------------------------
38
+ /**
39
+ * A stateful webhook handler that manages debouncing, budgeting,
40
+ * and evaluation dispatch.
41
+ *
42
+ * Create one instance per process and call `handle()` for each
43
+ * incoming webhook payload.
44
+ *
45
+ * ```ts
46
+ * const handler = new WebhookHandler({
47
+ * githubToken: process.env.GITHUB_TOKEN!,
48
+ * rootDir: "/path/to/packages/eval",
49
+ * })
50
+ *
51
+ * // In your HTTP handler:
52
+ * app.post("/webhook", async (req) => {
53
+ * const result = await handler.handle(req.body)
54
+ * return Response.json(result)
55
+ * })
56
+ * ```
57
+ */
58
+ export class WebhookHandler {
59
+ budget;
60
+ config;
61
+ debouncer;
62
+ /** Recent dispatch results (for diagnostics) */
63
+ recentDispatches = [];
64
+ reverseMapping;
65
+ constructor(config) {
66
+ this.config = {
67
+ dailyBudget: config.dailyBudget ?? DEFAULT_DAILY_BUDGET,
68
+ debounceMs: config.debounceMs ?? DEFAULT_DEBOUNCE_MS,
69
+ githubRepo: config.githubRepo ?? DEFAULT_REPO,
70
+ githubToken: config.githubToken,
71
+ rootDir: config.rootDir ?? process.cwd(),
72
+ webhookSecret: config.webhookSecret,
73
+ };
74
+ this.reverseMapping = buildReverseMapping(this.config.rootDir);
75
+ this.budget = createBudgetTracker(this.config.dailyBudget);
76
+ this.debouncer = createDebouncer(this.config.debounceMs, this.onDebounceFlush.bind(this));
77
+ }
78
+ /**
79
+ * Get handler diagnostics (for health check endpoints).
80
+ */
81
+ diagnostics() {
82
+ const budgetState = this.budget.currentState();
83
+ return {
84
+ budget: {
85
+ count: budgetState.count,
86
+ limit: budgetState.limit,
87
+ remaining: this.budget.remaining(),
88
+ },
89
+ pendingSlugs: this.debouncer.pending(),
90
+ recentDispatches: this.recentDispatches.slice(-10),
91
+ trackedSlugs: this.reverseMapping.size,
92
+ };
93
+ }
94
+ /**
95
+ * Handle an incoming Sanity webhook payload.
96
+ *
97
+ * This is the main entry point — call once per webhook request.
98
+ * Returns a result indicating what happened (dispatched, debounced,
99
+ * rate-limited, or ignored).
100
+ */
101
+ handle(payload) {
102
+ // Extract the document slug from the payload
103
+ const slug = extractSlug(payload);
104
+ if (!slug) {
105
+ return {
106
+ reason: "No document slug in payload",
107
+ status: "ignored",
108
+ };
109
+ }
110
+ // Look up affected areas
111
+ const impact = assessImpact([slug], this.reverseMapping);
112
+ if (impact.areas.length === 0) {
113
+ return {
114
+ reason: `Document "${slug}" is not tracked by any evaluation task`,
115
+ status: "ignored",
116
+ };
117
+ }
118
+ // Check daily budget
119
+ if (!this.budget.canDispatch()) {
120
+ const state = this.budget.currentState();
121
+ return {
122
+ dailyBudget: state.limit,
123
+ dailyCount: state.count,
124
+ status: "rate-limited",
125
+ };
126
+ }
127
+ // Push into debounce window
128
+ this.debouncer.push(slug);
129
+ const expiresAt = new Date(Date.now() + this.config.debounceMs).toISOString();
130
+ return {
131
+ pendingSlugs: [...this.debouncer.state().slugs],
132
+ status: "debounced",
133
+ windowExpiresAt: expiresAt,
134
+ };
135
+ }
136
+ /**
137
+ * Force-flush the debounce window (for graceful shutdown).
138
+ */
139
+ async shutdown() {
140
+ await this.debouncer.flush();
141
+ }
142
+ // -------------------------------------------------------------------------
143
+ // Private
144
+ // -------------------------------------------------------------------------
145
+ /**
146
+ * Called when the debounce window closes — dispatches a scoped evaluation.
147
+ */
148
+ async onDebounceFlush(slugs) {
149
+ // Re-check budget (may have been exhausted during debounce window)
150
+ if (!this.budget.canDispatch()) {
151
+ console.warn(` ⚠️ Budget exhausted during debounce window. Skipping dispatch for: ${slugs.join(", ")}`);
152
+ return;
153
+ }
154
+ const impact = assessImpact(slugs, this.reverseMapping);
155
+ if (impact.areas.length === 0)
156
+ return;
157
+ console.log(` 📤 Dispatching evaluation for areas [${impact.areas.join(", ")}] ` +
158
+ `triggered by document changes: ${slugs.join(", ")}`);
159
+ const result = await dispatchEvaluation({
160
+ areas: impact.areas,
161
+ documentSlug: slugs.join(","),
162
+ taskIds: impact.taskIds,
163
+ }, {
164
+ githubToken: this.config.githubToken,
165
+ repo: this.config.githubRepo,
166
+ });
167
+ // Record dispatch
168
+ this.budget.record();
169
+ this.recentDispatches.push({
170
+ areas: impact.areas,
171
+ result,
172
+ timestamp: new Date().toISOString(),
173
+ });
174
+ // Keep only last 50 dispatches
175
+ if (this.recentDispatches.length > 50) {
176
+ this.recentDispatches.splice(0, this.recentDispatches.length - 50);
177
+ }
178
+ if (result.ok) {
179
+ console.log(` ✅ Dispatch accepted (HTTP ${result.httpStatus})`);
180
+ }
181
+ else {
182
+ console.warn(` ⚠️ Dispatch failed: ${result.error}`);
183
+ }
184
+ }
185
+ }
186
+ // ---------------------------------------------------------------------------
187
+ // Helpers
188
+ // ---------------------------------------------------------------------------
189
+ /**
190
+ * Extract the document slug from a Sanity webhook payload.
191
+ *
192
+ * Sanity webhooks include the document in `result` — the slug is at
193
+ * `result.slug.current` for article documents.
194
+ */
195
+ function extractSlug(payload) {
196
+ // Direct slug from projected document
197
+ if (payload.result?.slug?.current) {
198
+ return payload.result.slug.current;
199
+ }
200
+ // Fallback: extract from _id (e.g., "article-groq-introduction")
201
+ // This handles cases where the projection doesn't include slug
202
+ return undefined;
203
+ }
@@ -0,0 +1,17 @@
1
+ /**
2
+ * webhook/index.ts
3
+ *
4
+ * Barrel exports for the webhook event-driven trigger system.
5
+ *
6
+ * @see docs/design-docs/report-store/visibility-workflows.md
7
+ */
8
+ export { handleEvalRequest } from "./eval-request-handler.js";
9
+ export type { EvalRequestHandlerConfig, EvalRequestPayload, EvalRequestResult, } from "./eval-request-handler.js";
10
+ export { createBudgetTracker } from "./budget.js";
11
+ export type { BudgetState, BudgetTracker } from "./budget.js";
12
+ export { createDebouncer } from "./debounce.js";
13
+ export type { Debouncer, DebounceCallback, DebounceState } from "./debounce.js";
14
+ export { dispatchEvaluation } from "./dispatch.js";
15
+ export type { DispatchOptions, DispatchResult } from "./dispatch.js";
16
+ export { WebhookHandler } from "./handler.js";
17
+ export type { DispatchRequest, SanityWebhookPayload, WebhookHandlerConfig, WebhookResult, } from "./types.js";
@@ -0,0 +1,12 @@
1
+ /**
2
+ * webhook/index.ts
3
+ *
4
+ * Barrel exports for the webhook event-driven trigger system.
5
+ *
6
+ * @see docs/design-docs/report-store/visibility-workflows.md
7
+ */
8
+ export { handleEvalRequest } from "./eval-request-handler.js";
9
+ export { createBudgetTracker } from "./budget.js";
10
+ export { createDebouncer } from "./debounce.js";
11
+ export { dispatchEvaluation } from "./dispatch.js";
12
+ export { WebhookHandler } from "./handler.js";
@@ -0,0 +1,109 @@
1
+ /**
2
+ * webhook/types.ts
3
+ *
4
+ * Types for the Sanity Content Lake webhook payload and the
5
+ * event-driven evaluation trigger system.
6
+ *
7
+ * @see https://www.sanity.io/docs/webhooks
8
+ * @see docs/design-docs/report-store/visibility-workflows.md
9
+ */
10
+ /** Evaluation dispatch request (sent to GitHub Actions) */
11
+ export interface DispatchRequest {
12
+ /** Feature areas to evaluate */
13
+ areas: string[];
14
+ /** The document that triggered this evaluation */
15
+ documentId?: string;
16
+ /** Document slug */
17
+ documentSlug?: string;
18
+ /** Evaluation mode */
19
+ mode?: string;
20
+ /** Specific task IDs to evaluate */
21
+ taskIds: string[];
22
+ }
23
+ /**
24
+ * Sanity webhook payload shape.
25
+ *
26
+ * When a GROQ-powered webhook fires, Sanity POSTs a JSON body with
27
+ * the matching document and operation metadata.
28
+ */
29
+ export interface SanityWebhookPayload {
30
+ /** The ID of the dataset */
31
+ dataset?: string;
32
+ /** The IDs of the documents that triggered the webhook */
33
+ ids: {
34
+ /** The created document ID (for create operations) */
35
+ created?: string;
36
+ /** The deleted document ID (for delete operations) */
37
+ deleted?: string;
38
+ /** The updated document ID (for update operations) */
39
+ updated?: string;
40
+ };
41
+ /** The operation that triggered the webhook */
42
+ operation: "create" | "delete" | "update";
43
+ /** The ID of the project */
44
+ projectId?: string;
45
+ /** The projected document data (shape depends on webhook GROQ projection) */
46
+ result?: {
47
+ _id: string;
48
+ _type: string;
49
+ slug?: {
50
+ current: string;
51
+ };
52
+ [key: string]: unknown;
53
+ };
54
+ }
55
+ /** Configuration for the webhook handler */
56
+ export interface WebhookHandlerConfig {
57
+ /**
58
+ * Budget: maximum evaluations per day.
59
+ * When exceeded, webhooks are acknowledged but not dispatched.
60
+ * Default: 20
61
+ */
62
+ dailyBudget?: number;
63
+ /**
64
+ * Debounce window in milliseconds.
65
+ * Rapid edits within this window are coalesced into a single evaluation.
66
+ * Default: 300_000 (5 minutes)
67
+ */
68
+ debounceMs?: number;
69
+ /**
70
+ * The GitHub repository to dispatch to (owner/repo format).
71
+ * Default: "sanity-labs/ai-literacy-framework"
72
+ */
73
+ githubRepo?: string;
74
+ /**
75
+ * GitHub personal access token for dispatching repository_dispatch events.
76
+ * Required for triggering evaluations.
77
+ */
78
+ githubToken: string;
79
+ /**
80
+ * Path to the eval package root (for building reverse mappings).
81
+ * Default: process.cwd()
82
+ */
83
+ rootDir?: string;
84
+ /**
85
+ * Sanity webhook secret for verifying payload authenticity.
86
+ * If provided, the handler validates the X-Sanity-Webhook-Signature header.
87
+ */
88
+ webhookSecret?: string;
89
+ }
90
+ /** Result of handling a webhook */
91
+ export type WebhookResult = {
92
+ status: "acknowledged";
93
+ reason: string;
94
+ } | {
95
+ status: "debounced";
96
+ pendingSlugs: string[];
97
+ windowExpiresAt: string;
98
+ } | {
99
+ status: "dispatched";
100
+ areas: string[];
101
+ taskIds: string[];
102
+ } | {
103
+ status: "ignored";
104
+ reason: string;
105
+ } | {
106
+ status: "rate-limited";
107
+ dailyCount: number;
108
+ dailyBudget: number;
109
+ };
@@ -0,0 +1,10 @@
1
+ /**
2
+ * webhook/types.ts
3
+ *
4
+ * Types for the Sanity Content Lake webhook payload and the
5
+ * event-driven evaluation trigger system.
6
+ *
7
+ * @see https://www.sanity.io/docs/webhooks
8
+ * @see docs/design-docs/report-store/visibility-workflows.md
9
+ */
10
+ export {};
package/package.json ADDED
@@ -0,0 +1,72 @@
1
+ {
2
+ "name": "@sanity/ailf",
3
+ "version": "0.1.0",
4
+ "private": false,
5
+ "publishConfig": {
6
+ "access": "restricted"
7
+ },
8
+ "license": "MIT",
9
+ "repository": {
10
+ "type": "git",
11
+ "url": "git+https://github.com/sanity-labs/ai-literacy-framework.git",
12
+ "directory": "packages/eval"
13
+ },
14
+ "description": "AI Literacy Framework - Evaluation tool for Sanity documentation",
15
+ "type": "module",
16
+ "bin": {
17
+ "ailf": "./bin/ailf.js"
18
+ },
19
+ "files": [
20
+ "bin",
21
+ "dist",
22
+ "config",
23
+ "canonical",
24
+ "tasks"
25
+ ],
26
+ "scripts": {
27
+ "build": "tsc && tsx scripts/bundle-workspace-deps.ts",
28
+ "generate-configs": "tsx src/cli.ts generate-configs",
29
+ "fetch-docs": "tsx src/cli.ts fetch-docs",
30
+ "measure-retrieval": "tsx src/cli.ts measure-retrieval",
31
+ "eval": "tsx src/cli.ts eval",
32
+ "compare": "tsx src/cli.ts compare",
33
+ "grader-consistency": "tsx src/cli.ts grader consistency",
34
+ "grader-validate": "tsx src/cli.ts grader validate",
35
+ "grader-compare": "tsx src/cli.ts grader compare",
36
+ "grader-sensitivity": "tsx src/cli.ts grader sensitivity",
37
+ "calculate-scores": "tsx src/cli.ts calculate-scores",
38
+ "agent-report": "tsx src/cli.ts agent-report",
39
+ "share": "dotenv -e ../../.env -o -- promptfoo share",
40
+ "view": "dotenv -e ../../.env -o -- promptfoo view",
41
+ "cli": "tsx src/cli.ts",
42
+ "pipeline": "tsx src/cli.ts pipeline",
43
+ "validate": "tsx src/cli.ts validate",
44
+ "test": "tsx --test src/__tests__/*.test.ts",
45
+ "pr-comment": "tsx src/cli.ts pr-comment",
46
+ "coverage-audit": "tsx src/cli.ts coverage-audit",
47
+ "readiness-report": "tsx src/cli.ts readiness-report",
48
+ "discovery-report": "tsx src/cli.ts discovery-report",
49
+ "webhook-server": "tsx src/cli.ts webhook-server",
50
+ "weekly-digest": "tsx src/cli.ts weekly-digest"
51
+ },
52
+ "dependencies": {
53
+ "@google-cloud/bigquery": "^8.1.1",
54
+ "@inquirer/prompts": "^8.3.0",
55
+ "@portabletext/markdown": "^1.0.0",
56
+ "@sanity/client": "^7.3.0",
57
+ "commander": "^14.0.3",
58
+ "dotenv": "^16.4.7",
59
+ "dotenv-cli": "^11.0.0",
60
+ "js-yaml": "^4.1.0",
61
+ "promptfoo": "^0.120.24",
62
+ "zod": "^4.3.6"
63
+ },
64
+ "devDependencies": {
65
+ "@sanity/ailf-core": "workspace:*",
66
+ "@sanity/ailf-shared": "workspace:*",
67
+ "@types/js-yaml": "^4.0.9",
68
+ "@types/node": "^22.13.1",
69
+ "tsx": "^4.19.2",
70
+ "typescript": "^5.7.3"
71
+ }
72
+ }
@@ -0,0 +1,51 @@
1
+ # .expanded.agentic.yaml
2
+ #
3
+ # AUTO-GENERATED — do not edit directly.
4
+ # Gold entries only (no baseline) for agentic evaluation mode.
5
+ # Source: tasks/*.yaml (single-definition format)
6
+ # Run: pnpm generate-configs
7
+
8
+ - assert:
9
+ - type: llm-rubric
10
+ value: |-
11
+ Score task completion from 0 to 100:
12
+ - 0: Couldn't attempt — missing critical information
13
+ - 20: Attempted but fundamentally wrong approach
14
+ - 50: Partial implementation — major functional gaps
15
+ - 80: Mostly complete — minor issues or missing edge cases
16
+ - 100: Fully functional code — works as expected
17
+
18
+ Must demonstrate:
19
+ - Configures a GROQ-powered webhook
20
+ - Webhook triggers on content changes
21
+ - Includes agent integration concepts
22
+
23
+ Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
24
+ metadata:
25
+ dimension: task-completion
26
+ maxScore: 100
27
+ - type: contains-any
28
+ value:
29
+ - webhook
30
+ - GROQ
31
+ weight: 1
32
+ - type: llm-rubric
33
+ value: |-
34
+ Score documentation coverage from 0 to 100:
35
+ - 0: Had to hallucinate/guess most implementation details
36
+ - 30: Significant gaps — filled with assumptions
37
+ - 50: Some gaps — inferred from partial information
38
+ - 80: Minor gaps — almost everything was documented
39
+ - 100: Complete coverage — all necessary info was in docs
40
+
41
+ Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
42
+ metadata:
43
+ dimension: doc-coverage
44
+ maxScore: 100
45
+ description: Test - Perspective ref expansion (gold)
46
+ vars:
47
+ docs: file://contexts/canonical/perspective-ref-test.md
48
+ task: |
49
+ Build a webhook handler that integrates with an AI agent pipeline.
50
+ Configure a GROQ-powered webhook that triggers when blog posts are
51
+ published and sends a payload to an agent endpoint.
@@ -0,0 +1,66 @@
1
+ # .expanded.yaml
2
+ #
3
+ # AUTO-GENERATED — do not edit directly.
4
+ # Source: tasks/*.yaml (single-definition format)
5
+ # Run: pnpm generate-configs
6
+
7
+ - assert:
8
+ - type: llm-rubric
9
+ value: |-
10
+ Score task completion from 0 to 100:
11
+ - 0: Couldn't attempt — missing critical information
12
+ - 20: Attempted but fundamentally wrong approach
13
+ - 50: Partial implementation — major functional gaps
14
+ - 80: Mostly complete — minor issues or missing edge cases
15
+ - 100: Fully functional code — works as expected
16
+
17
+ Must demonstrate:
18
+ - Configures a GROQ-powered webhook
19
+ - Webhook triggers on content changes
20
+ - Includes agent integration concepts
21
+
22
+ Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
23
+ metadata:
24
+ dimension: task-completion
25
+ maxScore: 100
26
+ - type: contains-any
27
+ value:
28
+ - webhook
29
+ - GROQ
30
+ weight: 1
31
+ - type: llm-rubric
32
+ value: |-
33
+ Score documentation coverage from 0 to 100:
34
+ - 0: Had to hallucinate/guess most implementation details
35
+ - 30: Significant gaps — filled with assumptions
36
+ - 50: Some gaps — inferred from partial information
37
+ - 80: Minor gaps — almost everything was documented
38
+ - 100: Complete coverage — all necessary info was in docs
39
+
40
+ Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
41
+ metadata:
42
+ dimension: doc-coverage
43
+ maxScore: 100
44
+ description: Test - Perspective ref expansion (gold)
45
+ prompts:
46
+ - with-docs
47
+ vars:
48
+ docs: file://contexts/canonical/perspective-ref-test.md
49
+ task: |
50
+ Build a webhook handler that integrates with an AI agent pipeline.
51
+ Configure a GROQ-powered webhook that triggers when blog posts are
52
+ published and sends a payload to an agent endpoint.
53
+ - description: Test - Perspective ref expansion (baseline)
54
+ prompts:
55
+ - without-docs
56
+ vars:
57
+ docs: ''
58
+ task: |
59
+ Build a webhook handler that integrates with an AI agent pipeline.
60
+ Configure a GROQ-powered webhook that triggers when blog posts are
61
+ published and sends a payload to an agent endpoint.
62
+ assert:
63
+ - type: llm-rubric
64
+ value: |-
65
+ Score task completion from 0 to 100 (same criteria as above).
66
+ Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
@@ -0,0 +1,98 @@
1
+ # tasks/frameworks.yaml
2
+ #
3
+ # Other Framework Integrations — Remix, Nuxt, etc.
4
+ #
5
+ # Each task is defined once. The pipeline auto-generates gold (with docs)
6
+ # and baseline (without docs) variants from each definition.
7
+
8
+ # ============================================================
9
+ # TASK: Remix Integration
10
+ # ============================================================
11
+ - id: remix-integration
12
+ description: "Frameworks - Remix integration with data fetching"
13
+ doc_coverage: true
14
+ canonical_docs:
15
+ - slug: displaying-content-in-a-react-router-front-end
16
+ reason: "React Router front-end content display guide"
17
+ - slug: visual-editing-with-react-router
18
+ reason: "React Router / Remix integration with visual editing"
19
+ - slug: functions-js-client
20
+ reason: "Configuring @sanity/client for data fetching"
21
+ reference_solution: reference-solutions/frameworks/remix.tsx
22
+ vars:
23
+ task: |
24
+ Integrate Sanity into a Remix application:
25
+
26
+ 1. Set up the Sanity client
27
+ 2. Create a loader that fetches blog posts using GROQ
28
+ 3. Build a route component that renders the fetched posts
29
+ 4. Handle loading and error states properly
30
+
31
+ Provide all necessary files for a working Remix + Sanity integration.
32
+ docs: file://contexts/canonical/remix-integration.md
33
+ assert:
34
+ - type: llm-rubric
35
+ template: task-completion
36
+ criteria:
37
+ - Sanity client configuration
38
+ - Remix loader function with GROQ query
39
+ - Route component using useLoaderData
40
+ - Proper typing
41
+
42
+ - type: llm-rubric
43
+ template: code-correctness
44
+ criteria:
45
+ - Modern Remix patterns (v2 conventions)
46
+ - Proper loader/component separation
47
+ - Valid GROQ queries
48
+ - No deprecated APIs
49
+
50
+ - type: contains-any
51
+ value:
52
+ - "useLoaderData"
53
+ - "loader"
54
+ weight: 1
55
+
56
+ # ============================================================
57
+ # TASK: Nuxt 3 Integration
58
+ # ============================================================
59
+ - id: nuxt-integration
60
+ description: "Frameworks - Nuxt 4 integration"
61
+ doc_coverage: true
62
+ canonical_docs:
63
+ - slug: displaying-content-in-nuxt-js
64
+ reason: "Nuxt.js front-end content display guide"
65
+ - slug: visual-editing-with-nuxt
66
+ reason: "Nuxt visual editing integration"
67
+ reference_solution: reference-solutions/frameworks/nuxt.ts
68
+ vars:
69
+ task: |
70
+ Integrate Sanity into a Nuxt 4 application:
71
+
72
+ 1. Install and configure the @nuxtjs/sanity module
73
+ 2. Create a page that fetches and displays blog posts
74
+ 3. Use Nuxt composables for data fetching
75
+
76
+ Provide all necessary configuration and component code.
77
+ docs: file://contexts/canonical/nuxt-integration.md
78
+ assert:
79
+ - type: llm-rubric
80
+ template: task-completion
81
+ criteria:
82
+ - "@nuxtjs/sanity module setup in nuxt.config.ts"
83
+ - Page component using Nuxt data fetching composables
84
+ - Sanity GROQ query
85
+
86
+ - type: llm-rubric
87
+ template: code-correctness
88
+ criteria:
89
+ - Nuxt 3 module configuration syntax
90
+ - Uses useSanityQuery or equivalent composable
91
+ - Proper Nuxt 3 patterns (not Nuxt 2)
92
+
93
+ - type: contains-any
94
+ value:
95
+ - "@nuxtjs/sanity"
96
+ - "useSanityQuery"
97
+ - "sanity:"
98
+ weight: 1