@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,49 @@
1
+ # thresholds.yaml
2
+ #
3
+ # Quality thresholds for readiness gates and regression alerts.
4
+ # Each threshold defines a minimum acceptable score. Violations are
5
+ # classified by severity and routed to configured sinks.
6
+ #
7
+ # Used by:
8
+ # - `pnpm pipeline --readiness` (launch readiness checklist)
9
+ # - `pnpm pipeline --publish` (severity-aware sink routing)
10
+ # - `pnpm pipeline --compare` (regression alerting)
11
+ #
12
+ # @see docs/exec-plans/active/scenario-matrix-implementation/phase-5-readiness-thresholds.md
13
+
14
+ # Global defaults (apply to all areas unless overridden)
15
+ defaults:
16
+ composite: 50 # minimum composite score
17
+ dimensions:
18
+ task-completion: 40
19
+ code-correctness: 30
20
+ doc-coverage: 30
21
+ doc-lift: 0 # minimum Doc Lift (0 = docs must not hurt)
22
+ ceiling: 40 # minimum ceiling score (doc quality floor)
23
+
24
+ # Per-area overrides (inherit from defaults, override specific values)
25
+ areas:
26
+ groq:
27
+ composite: 60 # GROQ is critical — higher bar
28
+ dimensions:
29
+ task-completion: 50
30
+ # visual-editing:
31
+ # composite: 45 # currently at 36, set achievable near-term target
32
+ # Areas not listed here use defaults
33
+
34
+ # Regression thresholds (for comparison reports)
35
+ regression:
36
+ composite: -3 # alert if composite drops more than 3 points
37
+ per-area: -5 # alert if any area drops more than 5 points
38
+ per-dimension: -8 # alert if any dimension drops more than 8 points
39
+
40
+ # Severity classification
41
+ severity:
42
+ critical: # blocks deployment, immediate notification
43
+ composite-below: 30
44
+ negative-doc-lift: true
45
+ warning: # flags for review, non-blocking
46
+ composite-below: 50
47
+ regression-exceeds: -3
48
+ info: # logged but not alerted
49
+ composite-below: 60
@@ -0,0 +1,190 @@
1
+ /**
2
+ * src/examples/index.ts — Generated example data.
3
+ *
4
+ * DO NOT EDIT — this file is generated by scripts/generate-examples.ts
5
+ * from the YAML files in packages/core/examples/.
6
+ *
7
+ * To regenerate: pnpm generate-examples
8
+ */
9
+ /** Parsed config example data (JSON-safe) */
10
+ export declare const configData: {
11
+ readonly models: readonly [{
12
+ readonly id: "openai:chat:gpt-5.2";
13
+ readonly label: "GPT 5.2";
14
+ readonly config: {
15
+ readonly temperature: 0.2;
16
+ readonly max_tokens: 4096;
17
+ };
18
+ readonly modes: readonly ["baseline", "observed", "agentic-naive", "agentic-optimized"];
19
+ }, {
20
+ readonly id: "anthropic:messages:claude-opus-4-6";
21
+ readonly label: "Claude Opus 4.6";
22
+ readonly config: {
23
+ readonly temperature: 0.2;
24
+ readonly max_tokens: 4096;
25
+ };
26
+ readonly modes: readonly ["baseline", "agentic-naive"];
27
+ }];
28
+ readonly grader: {
29
+ readonly id: "openai:gpt-5-2025-08-07";
30
+ readonly label: "GPT-5 (grader)";
31
+ };
32
+ readonly maxConcurrency: 32;
33
+ readonly defaults: {
34
+ readonly temperature: 0.2;
35
+ readonly max_tokens: 4096;
36
+ };
37
+ };
38
+ /** Raw YAML string for config example (preserves comments) */
39
+ export declare const configYaml = "# Example model configuration for AI Literacy Framework evaluations.\n#\n# This defines which LLMs to evaluate and which model grades the results.\n# The grader model should always be different from the evaluated models\n# (\"the judge should not be the defendant\").\n\nmodels:\n - id: \"openai:chat:gpt-5.2\"\n label: \"GPT 5.2\"\n config:\n temperature: 0.2\n max_tokens: 4096\n modes:\n - baseline\n - observed\n - agentic-naive\n - agentic-optimized\n\n - id: \"anthropic:messages:claude-opus-4-6\"\n label: \"Claude Opus 4.6\"\n config:\n temperature: 0.2\n max_tokens: 4096\n modes:\n - baseline\n - agentic-naive\n\ngrader:\n id: \"openai:gpt-5-2025-08-07\"\n label: \"GPT-5 (grader)\"\n\nmaxConcurrency: 32\n\ndefaults:\n temperature: 0.2\n max_tokens: 4096\n";
40
+ /** Parsed source example data (JSON-safe) */
41
+ export declare const sourceData: {
42
+ readonly name: "production";
43
+ readonly type: "sanity";
44
+ readonly projectId: "3do82whm";
45
+ readonly dataset: "next";
46
+ readonly baseUrl: "https://www.sanity.io/docs";
47
+ readonly llmsTxt: "https://www.sanity.io/docs/llms.txt";
48
+ readonly allowedOrigins: readonly ["sanity.io", "*.sanity.build"];
49
+ };
50
+ /** Raw YAML string for source example (preserves comments) */
51
+ export declare const sourceYaml = "# Example documentation source definition.\n#\n# Defines where the AI Literacy Framework fetches documentation content.\n# The framework uses these settings for both baseline (GROQ fetch) and\n# agentic (live URL) evaluation modes.\n\nname: production\ntype: sanity\nprojectId: \"3do82whm\"\ndataset: next\nbaseUrl: \"https://www.sanity.io/docs\"\nllmsTxt: \"https://www.sanity.io/docs/llms.txt\"\nallowedOrigins:\n - \"sanity.io\"\n - \"*.sanity.build\"\n";
52
+ /** Parsed rubric example data (JSON-safe) */
53
+ export declare const rubricData: {
54
+ readonly templates: {
55
+ readonly "task-completion": {
56
+ readonly dimension: "task_completion";
57
+ readonly weight: 0.5;
58
+ readonly prompt: "Evaluate whether the implementation correctly completes the assigned task.\n\nCriteria:\n{{criteria}}\n\nScore 0-100 where:\n- 0-20: Task not attempted or fundamentally wrong approach\n- 21-50: Partial implementation, major gaps\n- 51-80: Working implementation with minor issues\n- 81-100: Complete, correct implementation\n";
59
+ };
60
+ };
61
+ readonly weights: {
62
+ readonly task_completion: 0.5;
63
+ readonly code_correctness: 0.25;
64
+ readonly doc_coverage: 0.25;
65
+ };
66
+ };
67
+ /** Raw YAML string for rubric example (preserves comments) */
68
+ export declare const rubricYaml = "# Example rubric templates for LLM grading.\n#\n# Rubrics define how the grader LLM scores each dimension of a response.\n# The {{criteria}} placeholder is replaced with task-specific bullets\n# from each task's assert entries.\n#\n# Weights must sum to 1.0.\n\ntemplates:\n task-completion:\n dimension: task_completion\n weight: 0.5\n prompt: |\n Evaluate whether the implementation correctly completes the assigned task.\n\n Criteria:\n {{criteria}}\n\n Score 0-100 where:\n - 0-20: Task not attempted or fundamentally wrong approach\n - 21-50: Partial implementation, major gaps\n - 51-80: Working implementation with minor issues\n - 81-100: Complete, correct implementation\n\nweights:\n task_completion: 0.5\n code_correctness: 0.25\n doc_coverage: 0.25\n";
69
+ /** Parsed threshold example data (JSON-safe) */
70
+ export declare const thresholdData: {
71
+ readonly global: {
72
+ readonly composite: 60;
73
+ readonly dimensions: {
74
+ readonly task_completion: 55;
75
+ readonly code_correctness: 50;
76
+ readonly doc_coverage: 50;
77
+ };
78
+ readonly ceiling: 70;
79
+ readonly docLift: 10;
80
+ };
81
+ readonly areas: {
82
+ readonly groq: {
83
+ readonly composite: 65;
84
+ readonly ceiling: 75;
85
+ };
86
+ };
87
+ };
88
+ /** Raw YAML string for threshold example (preserves comments) */
89
+ export declare const thresholdYaml = "# Example quality threshold configuration.\n#\n# Thresholds define the minimum scores for readiness gates.\n# The pipeline's --readiness flag evaluates scores against these\n# thresholds and produces a go/no-go checklist.\n#\n# Global thresholds apply to all areas unless overridden per-area.\n\nglobal:\n composite: 60\n dimensions:\n task_completion: 55\n code_correctness: 50\n doc_coverage: 50\n ceiling: 70\n docLift: 10\n\nareas:\n groq:\n composite: 65\n ceiling: 75\n";
90
+ /** Parsed ailf-config example data (JSON-safe) */
91
+ export declare const ailfConfigData: {
92
+ readonly source: {
93
+ readonly projectId: "your-project-id";
94
+ readonly dataset: "production";
95
+ readonly baseUrl: "https://your-site.example.com/docs";
96
+ };
97
+ readonly triggers: {
98
+ readonly pr: {
99
+ readonly mode: "validate-only";
100
+ };
101
+ readonly "pr-task-change": {
102
+ readonly mode: "eval";
103
+ readonly paths: readonly [".ailf/**"];
104
+ };
105
+ readonly main: {
106
+ readonly mode: "eval";
107
+ readonly blocking: false;
108
+ readonly notify: true;
109
+ };
110
+ };
111
+ };
112
+ /** Raw YAML string for ailf-config example (preserves comments) */
113
+ export declare const ailfConfigYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# .ailf/config.yaml \u2014 AI Literacy Framework project configuration\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This file configures how the AILF evaluation pipeline runs in this\n# repository. Place it at .ailf/config.yaml in your project root.\n#\n# Docs: https://github.com/sanity-io/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n# Documentation source \u2014 where to fetch content for evaluation.\n#\n# projectId \u2014 your Sanity project ID (find it in sanity.io/manage)\n# dataset \u2014 the dataset to query (e.g., \"production\", \"staging\")\n# baseUrl \u2014 the public URL of your documentation site\n# (used by agentic mode to test agent discoverability)\nsource:\n projectId: \"your-project-id\"\n dataset: production\n baseUrl: \"https://your-site.example.com/docs\"\n\n# Trigger configuration \u2014 when evaluations run automatically.\n#\n# Each key is a trigger context. The pipeline checks which trigger\n# matches the current execution context (PR, merge, schedule, etc.)\n# and applies its settings.\n#\n# mode options:\n# validate-only \u2014 check that task YAML parses correctly (fast, no LLM calls)\n# eval \u2014 run the full evaluation pipeline\n#\n# paths \u2014 only trigger when files matching these globs change\n# blocking \u2014 if true, a failing eval blocks the PR merge\n# notify \u2014 if true, post results to configured notification channels\ntriggers:\n # On pull requests: just validate task files parse correctly\n pr:\n mode: validate-only\n\n # When .ailf/ files change in a PR: run a real evaluation\n pr-task-change:\n mode: eval\n paths: [\".ailf/**\"]\n\n # On merge to main: run evaluation (non-blocking)\n main:\n mode: eval\n blocking: false\n notify: true\n";
114
+ /** Parsed task data for example-groq-blog-listing (JSON-safe) */
115
+ export declare const exampleGroqBlogListingData: readonly [{
116
+ readonly id: "example-groq-blog-listing";
117
+ readonly description: "Example — Blog listing with GROQ queries";
118
+ readonly canonical_docs: readonly [{
119
+ readonly slug: "groq-introduction";
120
+ readonly reason: "Core GROQ syntax and query language reference";
121
+ }, {
122
+ readonly slug: "how-queries-work";
123
+ readonly reason: "Query execution model and best practices";
124
+ }];
125
+ readonly doc_coverage: true;
126
+ readonly reference_solution: "canonical/example-groq-blog-listing.ts";
127
+ readonly vars: {
128
+ readonly task: "Create a Next.js page component that lists blog posts from Sanity\nusing GROQ. The page should display the title, slug, and published\ndate for each post, sorted by most recent first. Use the Sanity\nclient to fetch data.\n";
129
+ readonly docs: "";
130
+ };
131
+ readonly assert: readonly [{
132
+ readonly type: "llm-rubric";
133
+ readonly template: "task-completion";
134
+ readonly criteria: readonly ["Uses the groq tagged template literal", "Fetches blog posts with title, slug, and publishedAt fields", "Orders results by publishedAt in descending order"];
135
+ }, {
136
+ readonly type: "llm-rubric";
137
+ readonly template: "code-correctness";
138
+ readonly criteria: readonly ["Uses createClient from @sanity/client or next-sanity", "Exports a valid Next.js page component"];
139
+ }];
140
+ readonly baseline: {
141
+ readonly enabled: true;
142
+ readonly rubric: "abbreviated";
143
+ };
144
+ }];
145
+ /** Raw YAML string for example-groq-blog-listing (preserves comments) */
146
+ export declare const exampleGroqBlogListingYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Blog listing with GROQ queries\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This is a starter template \u2014 edit it for your own documentation.\n# Each task evaluates whether an AI coding agent can implement a feature\n# using your docs as context. Delete this file or replace it entirely.\n#\n# To disable this task without deleting the file, set:\n# baseline:\n# enabled: false\n#\n# Full field reference:\n# https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/CONTRIBUTING_TASKS.md\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n# Unique identifier \u2014 lowercase alphanumeric with hyphens.\n# Must be unique across all task files in .ailf/tasks/.\n- id: example-groq-blog-listing\n\n # Short human-readable summary. Shown in score tables and reports.\n description: \"Example \u2014 Blog listing with GROQ queries\"\n\n # Feature area this task belongs to. Tasks with the same area are\n # grouped together in score summaries. Use a short kebab-case name.\n # featureArea is inferred from the filename by default, but you can\n # set it explicitly here.\n # featureArea: groq\n\n # Gold-standard documentation articles for this task. The pipeline\n # fetches these from Sanity and injects them into the prompt for\n # baseline evaluation. Each entry needs:\n # slug \u2014 the article's URL slug in your docs site\n # reason \u2014 why this doc is relevant (helps with auditing)\n canonical_docs:\n - slug: groq-introduction\n reason: \"Core GROQ syntax and query language reference\"\n - slug: how-queries-work\n reason: \"Query execution model and best practices\"\n\n # When true, the pipeline auto-generates an additional rubric that\n # checks whether the LLM's response actually used the provided docs.\n doc_coverage: true\n\n # Path to a gold-standard implementation, relative to canonical/.\n # The grader uses this as a reference when scoring code correctness.\n reference_solution: canonical/example-groq-blog-listing.ts\n\n # vars.task \u2014 the implementation prompt given to the LLM.\n # Write this as if you're asking a developer to build the feature.\n # Be specific about requirements so the grader can evaluate clearly.\n #\n # vars.docs \u2014 leave empty (\"\"). The pipeline fills this in:\n # \u2022 Gold variant: injected with canonical doc content\n # \u2022 Baseline variant: left empty (tests model knowledge alone)\n vars:\n task: |\n Create a Next.js page component that lists blog posts from Sanity\n using GROQ. The page should display the title, slug, and published\n date for each post, sorted by most recent first. Use the Sanity\n client to fetch data.\n docs: \"\"\n\n # Grading assertions \u2014 how the LLM's response is scored.\n #\n # \"llm-rubric\" assertions use a grader LLM to score against criteria.\n # The \"template\" references a rubric from config/rubrics.yaml.\n # The \"criteria\" are task-specific bullets injected into the template.\n #\n # Available templates:\n # task-completion \u2014 did the LLM implement the feature? (weight: 0.50)\n # code-correctness \u2014 is the code idiomatic and correct? (weight: 0.25)\n #\n # You can also use value-based assertions:\n # - type: contains\n # value: \"client.fetch\"\n # - type: contains-any\n # value: [\"createClient\", \"sanityClient\"]\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Uses the groq tagged template literal\"\n - \"Fetches blog posts with title, slug, and publishedAt fields\"\n - \"Orders results by publishedAt in descending order\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses createClient from @sanity/client or next-sanity\"\n - \"Exports a valid Next.js page component\"\n\n # Baseline variant configuration.\n # enabled \u2014 set to false to skip this task entirely\n # rubric \u2014 \"abbreviated\" (faster, default), \"full\", or \"none\"\n baseline:\n enabled: true\n rubric: abbreviated\n";
147
+ /** Parsed task data for example-studio-custom-input (JSON-safe) */
148
+ export declare const exampleStudioCustomInputData: readonly [{
149
+ readonly id: "example-studio-custom-input";
150
+ readonly description: "Example — Custom input component in Sanity Studio";
151
+ readonly canonical_docs: readonly [{
152
+ readonly slug: "custom-input-components";
153
+ readonly reason: "Guide for building custom form inputs in Sanity Studio";
154
+ }];
155
+ readonly doc_coverage: true;
156
+ readonly reference_solution: "canonical/example-studio-custom-input.ts";
157
+ readonly vars: {
158
+ readonly task: "Build a custom string input component for Sanity Studio that shows\na character count below the input field. The component should accept\na maxLength option from the field schema and display a warning when\nthe text exceeds the limit.\n";
159
+ readonly docs: "";
160
+ };
161
+ readonly assert: readonly [{
162
+ readonly type: "llm-rubric";
163
+ readonly template: "task-completion";
164
+ readonly criteria: readonly ["Implements a React component that renders a text input", "Displays a live character count", "Reads maxLength from schema options", "Shows a visual warning when limit is exceeded"];
165
+ }, {
166
+ readonly type: "llm-rubric";
167
+ readonly template: "code-correctness";
168
+ readonly criteria: readonly ["Uses the Sanity UI library for styling", "Calls onChange with patch operations"];
169
+ }];
170
+ readonly baseline: {
171
+ readonly enabled: true;
172
+ readonly rubric: "abbreviated";
173
+ };
174
+ }];
175
+ /** Raw YAML string for example-studio-custom-input (preserves comments) */
176
+ export declare const exampleStudioCustomInputYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Custom input component in Sanity Studio\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This is a starter template \u2014 edit it for your own documentation.\n# Delete this file or replace it with your own tasks.\n#\n# To disable without deleting:\n# baseline:\n# enabled: false\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n- id: example-studio-custom-input\n description: \"Example \u2014 Custom input component in Sanity Studio\"\n\n canonical_docs:\n - slug: custom-input-components\n reason: \"Guide for building custom form inputs in Sanity Studio\"\n\n doc_coverage: true\n reference_solution: canonical/example-studio-custom-input.ts\n\n vars:\n task: |\n Build a custom string input component for Sanity Studio that shows\n a character count below the input field. The component should accept\n a maxLength option from the field schema and display a warning when\n the text exceeds the limit.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Implements a React component that renders a text input\"\n - \"Displays a live character count\"\n - \"Reads maxLength from schema options\"\n - \"Shows a visual warning when limit is exceeded\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses the Sanity UI library for styling\"\n - \"Calls onChange with patch operations\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n";
177
+ /** All task example data as a flat array (JSON-safe) */
178
+ export declare const allTaskData: readonly unknown[];
179
+ /** Map of task ID (filename stem) → raw YAML string (preserves comments) */
180
+ export declare const taskYamlFiles: Record<string, string>;
181
+ /** List of task file stems, in alphabetical order */
182
+ export declare const TASK_FILE_NAMES: readonly ["example-groq-blog-listing", "example-studio-custom-input"];
183
+ export type ExampleType = "config" | "source" | "rubric" | "threshold" | "ailf-config" | "task";
184
+ export declare const EXAMPLE_TYPES: readonly ExampleType[];
185
+ export interface ExampleRecord {
186
+ description: string;
187
+ example: unknown;
188
+ yaml: string;
189
+ }
190
+ export declare const EXAMPLES: Record<ExampleType, ExampleRecord>;
@@ -0,0 +1,285 @@
1
+ /**
2
+ * src/examples/index.ts — Generated example data.
3
+ *
4
+ * DO NOT EDIT — this file is generated by scripts/generate-examples.ts
5
+ * from the YAML files in packages/core/examples/.
6
+ *
7
+ * To regenerate: pnpm generate-examples
8
+ */
9
+ // ---------------------------------------------------------------------------
10
+ // Model configuration for evaluation (config/models.yaml format)
11
+ // ---------------------------------------------------------------------------
12
+ /** Parsed config example data (JSON-safe) */
13
+ export const configData = {
14
+ "models": [
15
+ {
16
+ "id": "openai:chat:gpt-5.2",
17
+ "label": "GPT 5.2",
18
+ "config": {
19
+ "temperature": 0.2,
20
+ "max_tokens": 4096
21
+ },
22
+ "modes": [
23
+ "baseline",
24
+ "observed",
25
+ "agentic-naive",
26
+ "agentic-optimized"
27
+ ]
28
+ },
29
+ {
30
+ "id": "anthropic:messages:claude-opus-4-6",
31
+ "label": "Claude Opus 4.6",
32
+ "config": {
33
+ "temperature": 0.2,
34
+ "max_tokens": 4096
35
+ },
36
+ "modes": [
37
+ "baseline",
38
+ "agentic-naive"
39
+ ]
40
+ }
41
+ ],
42
+ "grader": {
43
+ "id": "openai:gpt-5-2025-08-07",
44
+ "label": "GPT-5 (grader)"
45
+ },
46
+ "maxConcurrency": 32,
47
+ "defaults": {
48
+ "temperature": 0.2,
49
+ "max_tokens": 4096
50
+ }
51
+ };
52
+ /** Raw YAML string for config example (preserves comments) */
53
+ export const configYaml = "# Example model configuration for AI Literacy Framework evaluations.\n#\n# This defines which LLMs to evaluate and which model grades the results.\n# The grader model should always be different from the evaluated models\n# (\"the judge should not be the defendant\").\n\nmodels:\n - id: \"openai:chat:gpt-5.2\"\n label: \"GPT 5.2\"\n config:\n temperature: 0.2\n max_tokens: 4096\n modes:\n - baseline\n - observed\n - agentic-naive\n - agentic-optimized\n\n - id: \"anthropic:messages:claude-opus-4-6\"\n label: \"Claude Opus 4.6\"\n config:\n temperature: 0.2\n max_tokens: 4096\n modes:\n - baseline\n - agentic-naive\n\ngrader:\n id: \"openai:gpt-5-2025-08-07\"\n label: \"GPT-5 (grader)\"\n\nmaxConcurrency: 32\n\ndefaults:\n temperature: 0.2\n max_tokens: 4096\n";
54
+ // ---------------------------------------------------------------------------
55
+ // Documentation source definition (config/sources.yaml format)
56
+ // ---------------------------------------------------------------------------
57
+ /** Parsed source example data (JSON-safe) */
58
+ export const sourceData = {
59
+ "name": "production",
60
+ "type": "sanity",
61
+ "projectId": "3do82whm",
62
+ "dataset": "next",
63
+ "baseUrl": "https://www.sanity.io/docs",
64
+ "llmsTxt": "https://www.sanity.io/docs/llms.txt",
65
+ "allowedOrigins": [
66
+ "sanity.io",
67
+ "*.sanity.build"
68
+ ]
69
+ };
70
+ /** Raw YAML string for source example (preserves comments) */
71
+ export const sourceYaml = "# Example documentation source definition.\n#\n# Defines where the AI Literacy Framework fetches documentation content.\n# The framework uses these settings for both baseline (GROQ fetch) and\n# agentic (live URL) evaluation modes.\n\nname: production\ntype: sanity\nprojectId: \"3do82whm\"\ndataset: next\nbaseUrl: \"https://www.sanity.io/docs\"\nllmsTxt: \"https://www.sanity.io/docs/llms.txt\"\nallowedOrigins:\n - \"sanity.io\"\n - \"*.sanity.build\"\n";
72
+ // ---------------------------------------------------------------------------
73
+ // Rubric templates for LLM grading (config/rubrics.yaml format)
74
+ // ---------------------------------------------------------------------------
75
+ /** Parsed rubric example data (JSON-safe) */
76
+ export const rubricData = {
77
+ "templates": {
78
+ "task-completion": {
79
+ "dimension": "task_completion",
80
+ "weight": 0.5,
81
+ "prompt": "Evaluate whether the implementation correctly completes the assigned task.\n\nCriteria:\n{{criteria}}\n\nScore 0-100 where:\n- 0-20: Task not attempted or fundamentally wrong approach\n- 21-50: Partial implementation, major gaps\n- 51-80: Working implementation with minor issues\n- 81-100: Complete, correct implementation\n"
82
+ }
83
+ },
84
+ "weights": {
85
+ "task_completion": 0.5,
86
+ "code_correctness": 0.25,
87
+ "doc_coverage": 0.25
88
+ }
89
+ };
90
+ /** Raw YAML string for rubric example (preserves comments) */
91
+ export const rubricYaml = "# Example rubric templates for LLM grading.\n#\n# Rubrics define how the grader LLM scores each dimension of a response.\n# The {{criteria}} placeholder is replaced with task-specific bullets\n# from each task's assert entries.\n#\n# Weights must sum to 1.0.\n\ntemplates:\n task-completion:\n dimension: task_completion\n weight: 0.5\n prompt: |\n Evaluate whether the implementation correctly completes the assigned task.\n\n Criteria:\n {{criteria}}\n\n Score 0-100 where:\n - 0-20: Task not attempted or fundamentally wrong approach\n - 21-50: Partial implementation, major gaps\n - 51-80: Working implementation with minor issues\n - 81-100: Complete, correct implementation\n\nweights:\n task_completion: 0.5\n code_correctness: 0.25\n doc_coverage: 0.25\n";
92
+ // ---------------------------------------------------------------------------
93
+ // Quality threshold configuration (config/thresholds.yaml format)
94
+ // ---------------------------------------------------------------------------
95
+ /** Parsed threshold example data (JSON-safe) */
96
+ export const thresholdData = {
97
+ "global": {
98
+ "composite": 60,
99
+ "dimensions": {
100
+ "task_completion": 55,
101
+ "code_correctness": 50,
102
+ "doc_coverage": 50
103
+ },
104
+ "ceiling": 70,
105
+ "docLift": 10
106
+ },
107
+ "areas": {
108
+ "groq": {
109
+ "composite": 65,
110
+ "ceiling": 75
111
+ }
112
+ }
113
+ };
114
+ /** Raw YAML string for threshold example (preserves comments) */
115
+ export const thresholdYaml = "# Example quality threshold configuration.\n#\n# Thresholds define the minimum scores for readiness gates.\n# The pipeline's --readiness flag evaluates scores against these\n# thresholds and produces a go/no-go checklist.\n#\n# Global thresholds apply to all areas unless overridden per-area.\n\nglobal:\n composite: 60\n dimensions:\n task_completion: 55\n code_correctness: 50\n doc_coverage: 50\n ceiling: 70\n docLift: 10\n\nareas:\n groq:\n composite: 65\n ceiling: 75\n";
116
+ // ---------------------------------------------------------------------------
117
+ // Project configuration for .ailf/config.yaml
118
+ // ---------------------------------------------------------------------------
119
+ /** Parsed ailf-config example data (JSON-safe) */
120
+ export const ailfConfigData = {
121
+ "source": {
122
+ "projectId": "your-project-id",
123
+ "dataset": "production",
124
+ "baseUrl": "https://your-site.example.com/docs"
125
+ },
126
+ "triggers": {
127
+ "pr": {
128
+ "mode": "validate-only"
129
+ },
130
+ "pr-task-change": {
131
+ "mode": "eval",
132
+ "paths": [
133
+ ".ailf/**"
134
+ ]
135
+ },
136
+ "main": {
137
+ "mode": "eval",
138
+ "blocking": false,
139
+ "notify": true
140
+ }
141
+ }
142
+ };
143
+ /** Raw YAML string for ailf-config example (preserves comments) */
144
+ export const ailfConfigYaml = "# ──────────────────────────────────────────────────────────────────────\n# .ailf/config.yaml — AI Literacy Framework project configuration\n# ──────────────────────────────────────────────────────────────────────\n#\n# This file configures how the AILF evaluation pipeline runs in this\n# repository. Place it at .ailf/config.yaml in your project root.\n#\n# Docs: https://github.com/sanity-io/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\n# Documentation source — where to fetch content for evaluation.\n#\n# projectId — your Sanity project ID (find it in sanity.io/manage)\n# dataset — the dataset to query (e.g., \"production\", \"staging\")\n# baseUrl — the public URL of your documentation site\n# (used by agentic mode to test agent discoverability)\nsource:\n projectId: \"your-project-id\"\n dataset: production\n baseUrl: \"https://your-site.example.com/docs\"\n\n# Trigger configuration — when evaluations run automatically.\n#\n# Each key is a trigger context. The pipeline checks which trigger\n# matches the current execution context (PR, merge, schedule, etc.)\n# and applies its settings.\n#\n# mode options:\n# validate-only — check that task YAML parses correctly (fast, no LLM calls)\n# eval — run the full evaluation pipeline\n#\n# paths — only trigger when files matching these globs change\n# blocking — if true, a failing eval blocks the PR merge\n# notify — if true, post results to configured notification channels\ntriggers:\n # On pull requests: just validate task files parse correctly\n pr:\n mode: validate-only\n\n # When .ailf/ files change in a PR: run a real evaluation\n pr-task-change:\n mode: eval\n paths: [\".ailf/**\"]\n\n # On merge to main: run evaluation (non-blocking)\n main:\n mode: eval\n blocking: false\n notify: true\n";
145
+ /** Parsed task data for example-groq-blog-listing (JSON-safe) */
146
+ export const exampleGroqBlogListingData = [
147
+ {
148
+ "id": "example-groq-blog-listing",
149
+ "description": "Example — Blog listing with GROQ queries",
150
+ "canonical_docs": [
151
+ {
152
+ "slug": "groq-introduction",
153
+ "reason": "Core GROQ syntax and query language reference"
154
+ },
155
+ {
156
+ "slug": "how-queries-work",
157
+ "reason": "Query execution model and best practices"
158
+ }
159
+ ],
160
+ "doc_coverage": true,
161
+ "reference_solution": "canonical/example-groq-blog-listing.ts",
162
+ "vars": {
163
+ "task": "Create a Next.js page component that lists blog posts from Sanity\nusing GROQ. The page should display the title, slug, and published\ndate for each post, sorted by most recent first. Use the Sanity\nclient to fetch data.\n",
164
+ "docs": ""
165
+ },
166
+ "assert": [
167
+ {
168
+ "type": "llm-rubric",
169
+ "template": "task-completion",
170
+ "criteria": [
171
+ "Uses the groq tagged template literal",
172
+ "Fetches blog posts with title, slug, and publishedAt fields",
173
+ "Orders results by publishedAt in descending order"
174
+ ]
175
+ },
176
+ {
177
+ "type": "llm-rubric",
178
+ "template": "code-correctness",
179
+ "criteria": [
180
+ "Uses createClient from @sanity/client or next-sanity",
181
+ "Exports a valid Next.js page component"
182
+ ]
183
+ }
184
+ ],
185
+ "baseline": {
186
+ "enabled": true,
187
+ "rubric": "abbreviated"
188
+ }
189
+ }
190
+ ];
191
+ /** Raw YAML string for example-groq-blog-listing (preserves comments) */
192
+ export const exampleGroqBlogListingYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Blog listing with GROQ queries\n# ──────────────────────────────────────────────────────────────────────\n#\n# This is a starter template — edit it for your own documentation.\n# Each task evaluates whether an AI coding agent can implement a feature\n# using your docs as context. Delete this file or replace it entirely.\n#\n# To disable this task without deleting the file, set:\n# baseline:\n# enabled: false\n#\n# Full field reference:\n# https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/CONTRIBUTING_TASKS.md\n# ──────────────────────────────────────────────────────────────────────\n\n# Unique identifier — lowercase alphanumeric with hyphens.\n# Must be unique across all task files in .ailf/tasks/.\n- id: example-groq-blog-listing\n\n # Short human-readable summary. Shown in score tables and reports.\n description: \"Example — Blog listing with GROQ queries\"\n\n # Feature area this task belongs to. Tasks with the same area are\n # grouped together in score summaries. Use a short kebab-case name.\n # featureArea is inferred from the filename by default, but you can\n # set it explicitly here.\n # featureArea: groq\n\n # Gold-standard documentation articles for this task. The pipeline\n # fetches these from Sanity and injects them into the prompt for\n # baseline evaluation. Each entry needs:\n # slug — the article's URL slug in your docs site\n # reason — why this doc is relevant (helps with auditing)\n canonical_docs:\n - slug: groq-introduction\n reason: \"Core GROQ syntax and query language reference\"\n - slug: how-queries-work\n reason: \"Query execution model and best practices\"\n\n # When true, the pipeline auto-generates an additional rubric that\n # checks whether the LLM's response actually used the provided docs.\n doc_coverage: true\n\n # Path to a gold-standard implementation, relative to canonical/.\n # The grader uses this as a reference when scoring code correctness.\n reference_solution: canonical/example-groq-blog-listing.ts\n\n # vars.task — the implementation prompt given to the LLM.\n # Write this as if you're asking a developer to build the feature.\n # Be specific about requirements so the grader can evaluate clearly.\n #\n # vars.docs — leave empty (\"\"). The pipeline fills this in:\n # • Gold variant: injected with canonical doc content\n # • Baseline variant: left empty (tests model knowledge alone)\n vars:\n task: |\n Create a Next.js page component that lists blog posts from Sanity\n using GROQ. The page should display the title, slug, and published\n date for each post, sorted by most recent first. Use the Sanity\n client to fetch data.\n docs: \"\"\n\n # Grading assertions — how the LLM's response is scored.\n #\n # \"llm-rubric\" assertions use a grader LLM to score against criteria.\n # The \"template\" references a rubric from config/rubrics.yaml.\n # The \"criteria\" are task-specific bullets injected into the template.\n #\n # Available templates:\n # task-completion — did the LLM implement the feature? (weight: 0.50)\n # code-correctness — is the code idiomatic and correct? (weight: 0.25)\n #\n # You can also use value-based assertions:\n # - type: contains\n # value: \"client.fetch\"\n # - type: contains-any\n # value: [\"createClient\", \"sanityClient\"]\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Uses the groq tagged template literal\"\n - \"Fetches blog posts with title, slug, and publishedAt fields\"\n - \"Orders results by publishedAt in descending order\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses createClient from @sanity/client or next-sanity\"\n - \"Exports a valid Next.js page component\"\n\n # Baseline variant configuration.\n # enabled — set to false to skip this task entirely\n # rubric — \"abbreviated\" (faster, default), \"full\", or \"none\"\n baseline:\n enabled: true\n rubric: abbreviated\n";
193
+ /** Parsed task data for example-studio-custom-input (JSON-safe) */
194
+ export const exampleStudioCustomInputData = [
195
+ {
196
+ "id": "example-studio-custom-input",
197
+ "description": "Example — Custom input component in Sanity Studio",
198
+ "canonical_docs": [
199
+ {
200
+ "slug": "custom-input-components",
201
+ "reason": "Guide for building custom form inputs in Sanity Studio"
202
+ }
203
+ ],
204
+ "doc_coverage": true,
205
+ "reference_solution": "canonical/example-studio-custom-input.ts",
206
+ "vars": {
207
+ "task": "Build a custom string input component for Sanity Studio that shows\na character count below the input field. The component should accept\na maxLength option from the field schema and display a warning when\nthe text exceeds the limit.\n",
208
+ "docs": ""
209
+ },
210
+ "assert": [
211
+ {
212
+ "type": "llm-rubric",
213
+ "template": "task-completion",
214
+ "criteria": [
215
+ "Implements a React component that renders a text input",
216
+ "Displays a live character count",
217
+ "Reads maxLength from schema options",
218
+ "Shows a visual warning when limit is exceeded"
219
+ ]
220
+ },
221
+ {
222
+ "type": "llm-rubric",
223
+ "template": "code-correctness",
224
+ "criteria": [
225
+ "Uses the Sanity UI library for styling",
226
+ "Calls onChange with patch operations"
227
+ ]
228
+ }
229
+ ],
230
+ "baseline": {
231
+ "enabled": true,
232
+ "rubric": "abbreviated"
233
+ }
234
+ }
235
+ ];
236
+ /** Raw YAML string for example-studio-custom-input (preserves comments) */
237
+ export const exampleStudioCustomInputYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Custom input component in Sanity Studio\n# ──────────────────────────────────────────────────────────────────────\n#\n# This is a starter template — edit it for your own documentation.\n# Delete this file or replace it with your own tasks.\n#\n# To disable without deleting:\n# baseline:\n# enabled: false\n# ──────────────────────────────────────────────────────────────────────\n\n- id: example-studio-custom-input\n description: \"Example — Custom input component in Sanity Studio\"\n\n canonical_docs:\n - slug: custom-input-components\n reason: \"Guide for building custom form inputs in Sanity Studio\"\n\n doc_coverage: true\n reference_solution: canonical/example-studio-custom-input.ts\n\n vars:\n task: |\n Build a custom string input component for Sanity Studio that shows\n a character count below the input field. The component should accept\n a maxLength option from the field schema and display a warning when\n the text exceeds the limit.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Implements a React component that renders a text input\"\n - \"Displays a live character count\"\n - \"Reads maxLength from schema options\"\n - \"Shows a visual warning when limit is exceeded\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses the Sanity UI library for styling\"\n - \"Calls onChange with patch operations\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n";
238
+ // ---------------------------------------------------------------------------
239
+ // Aggregate task exports
240
+ // ---------------------------------------------------------------------------
241
+ /** All task example data as a flat array (JSON-safe) */
242
+ export const allTaskData = [
243
+ ...exampleGroqBlogListingData,
244
+ ...exampleStudioCustomInputData,
245
+ ];
246
+ /** Map of task ID (filename stem) → raw YAML string (preserves comments) */
247
+ export const taskYamlFiles = {
248
+ "example-groq-blog-listing": exampleGroqBlogListingYaml,
249
+ "example-studio-custom-input": exampleStudioCustomInputYaml,
250
+ };
251
+ /** List of task file stems, in alphabetical order */
252
+ export const TASK_FILE_NAMES = ["example-groq-blog-listing", "example-studio-custom-input"];
253
+ export const EXAMPLE_TYPES = ["config", "source", "rubric", "threshold", "ailf-config", "task"];
254
+ export const EXAMPLES = {
255
+ "config": {
256
+ description: "Model configuration for evaluation (config/models.yaml format)",
257
+ example: configData,
258
+ yaml: configYaml,
259
+ },
260
+ "source": {
261
+ description: "Documentation source definition (config/sources.yaml format)",
262
+ example: sourceData,
263
+ yaml: sourceYaml,
264
+ },
265
+ "rubric": {
266
+ description: "Rubric templates for LLM grading (config/rubrics.yaml format)",
267
+ example: rubricData,
268
+ yaml: rubricYaml,
269
+ },
270
+ "threshold": {
271
+ description: "Quality threshold configuration (config/thresholds.yaml format)",
272
+ example: thresholdData,
273
+ yaml: thresholdYaml,
274
+ },
275
+ "ailf-config": {
276
+ description: "Project configuration for .ailf/config.yaml",
277
+ example: ailfConfigData,
278
+ yaml: ailfConfigYaml,
279
+ },
280
+ "task": {
281
+ description: "Task definitions for evaluating AI implementation of Sanity features",
282
+ example: allTaskData,
283
+ yaml: Object.values(taskYamlFiles).join("\n"),
284
+ },
285
+ };
@@ -0,0 +1,17 @@
1
+ /**
2
+ * @sanity/ailf-core — Domain kernel for the AI Literacy Framework.
3
+ *
4
+ * This package contains:
5
+ * - Domain types (Report, ScoreSummary, FeatureScore, etc.)
6
+ * - Zod validation schemas (task definitions, rubrics, config)
7
+ * - Port interfaces (TaskSource, DocFetcher, EvalRunner, etc.)
8
+ * - Pure domain services (score calculation, comparison, etc.)
9
+ *
10
+ * Design rule: this package has ZERO imports from @sanity/ailf
11
+ * or @sanity/ailf-studio. Dependencies flow inward only.
12
+ */
13
+ export * from "./types/index.js";
14
+ export * from "./schemas/index.js";
15
+ export * from "./ports/index.js";
16
+ export * from "./services/index.js";
17
+ export * from "./examples/index.js";
@@ -0,0 +1,17 @@
1
+ /**
2
+ * @sanity/ailf-core — Domain kernel for the AI Literacy Framework.
3
+ *
4
+ * This package contains:
5
+ * - Domain types (Report, ScoreSummary, FeatureScore, etc.)
6
+ * - Zod validation schemas (task definitions, rubrics, config)
7
+ * - Port interfaces (TaskSource, DocFetcher, EvalRunner, etc.)
8
+ * - Pure domain services (score calculation, comparison, etc.)
9
+ *
10
+ * Design rule: this package has ZERO imports from @sanity/ailf
11
+ * or @sanity/ailf-studio. Dependencies flow inward only.
12
+ */
13
+ export * from "./types/index.js";
14
+ export * from "./schemas/index.js";
15
+ export * from "./ports/index.js";
16
+ export * from "./services/index.js";
17
+ export * from "./examples/index.js";