@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
package/README.md ADDED
@@ -0,0 +1,89 @@
1
+ # @sanity/ailf
2
+
3
+ CLI and evaluation engine for the **AI Literacy Framework** — measures how
4
+ effectively documentation enables AI coding tools to implement features
5
+ correctly.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ # Run without installing (recommended for quick start)
11
+ npx @sanity/ailf --help
12
+
13
+ # Or install globally
14
+ pnpm add -g @sanity/ailf
15
+
16
+ # Or as a project dependency
17
+ pnpm add @sanity/ailf
18
+ ```
19
+
20
+ ## Quick start
21
+
22
+ ### 1. Initialize a project
23
+
24
+ ```bash
25
+ npx @sanity/ailf init
26
+ ```
27
+
28
+ This creates a `.ailf/` directory with example configuration and task files:
29
+
30
+ ```
31
+ .ailf/
32
+ ├── config.yaml # Project configuration
33
+ ├── .gitignore # Keeps generated files out of VCS
34
+ └── tasks/
35
+ ├── example-groq-blog-listing.yaml
36
+ └── example-studio-custom-input.yaml
37
+ ```
38
+
39
+ ### 2. Set up environment
40
+
41
+ Create a `.env` file in your project root:
42
+
43
+ ```bash
44
+ # Required — LLM provider for evaluation and grading
45
+ OPENAI_API_KEY=sk-...
46
+
47
+ # Required — read access to Sanity documentation content
48
+ SANITY_API_TOKEN=sk...
49
+
50
+ # Optional — publish reports to your Sanity Studio
51
+ AILF_REPORT_SANITY_API_TOKEN=sk...
52
+ AILF_REPORT_PROJECT_ID=your-project-id
53
+ AILF_REPORT_DATASET=production
54
+ ```
55
+
56
+ ### 3. Edit tasks and run
57
+
58
+ ```bash
59
+ # Edit .ailf/config.yaml with your Sanity project settings
60
+ # Customize or replace the example tasks in .ailf/tasks/
61
+
62
+ # Validate task definitions
63
+ npx @sanity/ailf validate-tasks .ailf/tasks/
64
+
65
+ # Run evaluation in debug mode (fast feedback)
66
+ npx @sanity/ailf pipeline --repo-tasks-path .ailf/tasks/ --debug
67
+
68
+ # Full evaluation
69
+ npx @sanity/ailf pipeline --repo-tasks-path .ailf/tasks/
70
+ ```
71
+
72
+ ## Documentation
73
+
74
+ - **[API Reference](https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/API.md)**
75
+ — all commands, flags, and environment variables
76
+ - **[Contributing Tasks](https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/CONTRIBUTING_TASKS.md)**
77
+ — task authoring guide
78
+ - **[Architecture](https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/ARCHITECTURE.md)**
79
+ — domain model and data flow
80
+ - **[Root README](https://github.com/sanity-labs/ai-literacy-framework)** —
81
+ project overview
82
+
83
+ ## Related packages
84
+
85
+ | Package | Description |
86
+ | -------------------------------------------------------------------------- | -------------------------------------------------- |
87
+ | [`@sanity/ailf-studio`](https://www.npmjs.com/package/@sanity/ailf-studio) | Sanity Studio dashboard plugin for viewing reports |
88
+ | `@sanity/ailf-core` | Domain kernel (types, schemas, ports) |
89
+ | `@sanity/ailf-shared` | Cross-package contract types |
package/bin/ailf.js ADDED
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * ailf — CLI launcher for the AI Literacy Framework.
5
+ *
6
+ * Runs in two modes:
7
+ * 1. **Development** (monorepo) — delegates to TypeScript source via tsx.
8
+ * Detected by the presence of src/cli.ts.
9
+ * 2. **Production** (npm install) — imports the compiled dist/cli.js directly.
10
+ * This is the path when installed via `npx @sanity/ailf-eval` or
11
+ * `pnpm add @sanity/ailf-eval`.
12
+ */
13
+
14
+ import { existsSync } from "fs"
15
+ import { execFileSync } from "child_process"
16
+ import { dirname, resolve } from "path"
17
+ import { fileURLToPath } from "url"
18
+
19
+ const __dirname = dirname(fileURLToPath(import.meta.url))
20
+ const ROOT = resolve(__dirname, "..")
21
+ const tsSrc = resolve(ROOT, "src", "cli.ts")
22
+ const jsDist = resolve(ROOT, "dist", "cli.js")
23
+
24
+ // Forward all arguments after the script name
25
+ const args = process.argv.slice(2)
26
+
27
+ // Preserve the caller's working directory so commands like `ailf init`
28
+ // operate on the user's cwd, not the eval package root.
29
+ const callerCwd = process.cwd()
30
+
31
+ // ---------------------------------------------------------------------------
32
+ // Mode 1: Development — tsx + TypeScript source (no build step needed)
33
+ // ---------------------------------------------------------------------------
34
+ if (existsSync(tsSrc)) {
35
+ try {
36
+ execFileSync("npx", ["tsx", tsSrc, ...args], {
37
+ cwd: ROOT,
38
+ env: { ...process.env, AILF_CALLER_CWD: callerCwd },
39
+ stdio: "inherit",
40
+ })
41
+ process.exit(0)
42
+ } catch (err) {
43
+ // execFileSync throws on non-zero exit codes — propagate them
44
+ const code =
45
+ err !== null && typeof err === "object" && "status" in err
46
+ ? err.status
47
+ : 1
48
+ process.exit(code)
49
+ }
50
+ }
51
+
52
+ // ---------------------------------------------------------------------------
53
+ // Mode 2: Production — compiled JS (npm install / npx)
54
+ // ---------------------------------------------------------------------------
55
+ if (existsSync(jsDist)) {
56
+ // Set AILF_CALLER_CWD so cli.ts resolves paths relative to the caller
57
+ process.env.AILF_CALLER_CWD = callerCwd
58
+ await import(jsDist)
59
+ } else {
60
+ console.error(
61
+ "ailf: Cannot find CLI entry point. Run 'pnpm build' or ensure tsx is available."
62
+ )
63
+ process.exit(1)
64
+ }
@@ -0,0 +1,88 @@
1
+ # Grader References — Human-Graded Gold Standard
2
+
3
+ > Human expert grades for validating LLM grader accuracy.
4
+
5
+ ## Purpose
6
+
7
+ These YAML files contain **human-graded reference samples** — real LLM responses
8
+ paired with expert human scores across all three dimensions (Task Completion,
9
+ Code Correctness, Doc Coverage). The grader validation tool
10
+ (`pnpm grader-validate`) runs the current grader on these responses and compares
11
+ its scores against the human grades.
12
+
13
+ ## File format
14
+
15
+ Each YAML file contains an array of reference-graded samples:
16
+
17
+ ```yaml
18
+ - taskId: groq-blog-queries
19
+ area: groq
20
+ response: |
21
+ <the LLM's actual response text from an eval run>
22
+ rubrics:
23
+ - dimension: task-completion
24
+ rubricText: |
25
+ <the full rubric text used for grading>
26
+ humanScore: 75
27
+ notes: "Correct GROQ syntax but missed ordering clause"
28
+ - dimension: code-correctness
29
+ rubricText: |
30
+ <the full rubric text>
31
+ humanScore: 60
32
+ - dimension: doc-coverage
33
+ rubricText: |
34
+ <the full rubric text>
35
+ humanScore: 80
36
+ gradedBy: "engineer-name"
37
+ gradedAt: "2026-03-10"
38
+ ```
39
+
40
+ ## Grading protocol
41
+
42
+ To minimize subjectivity when creating reference grades:
43
+
44
+ 1. **Use the same rubric text** the LLM grader uses (copy from the expanded task
45
+ file or the eval results JSON)
46
+ 2. **Two independent graders per sample**, reconcile disagreements
47
+ 3. **Focus on edge cases** — samples where the grader is likely to be wrong
48
+ (subtle bugs, plausible but incorrect code, partially correct responses)
49
+ 4. **Score on the 0–100 scale** matching the rubric bands:
50
+ - 0–19: Failed / missing critical info
51
+ - 20–49: Attempted but fundamentally wrong
52
+ - 50–79: Partial / mostly correct
53
+ - 80–100: Complete / fully correct
54
+ 5. **Include notes** explaining your reasoning, especially for non-obvious
55
+ scores. These help diagnose grader failures.
56
+
57
+ ## How to extract responses from eval results
58
+
59
+ ```bash
60
+ # View a specific gold test response from eval results:
61
+ python3 -c "
62
+ import json
63
+ with open('results/latest/eval-results.json') as f:
64
+ data = json.load(f)
65
+ for r in data['results']['results']:
66
+ desc = r.get('testCase',{}).get('description','')
67
+ if 'groq-blog-queries' in desc.lower() and '(gold)' in desc:
68
+ print(r['response']['output'][:2000])
69
+ break
70
+ "
71
+ ```
72
+
73
+ ## Coverage goals
74
+
75
+ Start with 2–3 reference-graded samples per feature area, covering:
76
+
77
+ - A high-quality response (scores mostly 80+)
78
+ - A medium-quality response (scores 40–70)
79
+ - A low-quality or incorrect response (scores 0–30)
80
+
81
+ This gives the validation tool a range of quality levels to check the grader's
82
+ discrimination power across the full scale.
83
+
84
+ ## Related
85
+
86
+ - [Grader Reliability Plan](../../../docs/exec-plans/completed/grader-reliability.md)
87
+ — Phase 2
88
+ - [Rubric Templates](../../config/rubrics.yaml) — the rubrics used for grading
@@ -0,0 +1,234 @@
1
+ # groq.yaml — Human-graded reference samples for GROQ tasks
2
+ #
3
+ # These are real LLM responses from eval runs, graded by a human expert.
4
+ # Used by `pnpm grader-validate` to measure grader accuracy.
5
+
6
+ - taskId: groq-blog-queries
7
+ area: groq
8
+ response: |
9
+ To write GROQ queries for a Sanity blog application using `@sanity/client`, here is a complete TypeScript implementation:
10
+
11
+ ```typescript
12
+ import { createClient } from "@sanity/client";
13
+
14
+ const client = createClient({
15
+ projectId: "your-project-id",
16
+ dataset: "production",
17
+ apiVersion: "2024-01-01",
18
+ useCdn: true,
19
+ });
20
+
21
+ // Types
22
+ interface PostSummary {
23
+ _id: string;
24
+ title: string;
25
+ slug: string;
26
+ publishedAt: string;
27
+ excerpt: string;
28
+ authorName: string;
29
+ }
30
+
31
+ // 1. Fetch all published blog posts
32
+ const allPostsQuery = `*[_type == "post"] | order(publishedAt desc) [0...10] {
33
+ _id,
34
+ title,
35
+ "slug": slug.current,
36
+ publishedAt,
37
+ excerpt,
38
+ "authorName": author->name
39
+ }`;
40
+
41
+ export async function getAllPosts(): Promise<PostSummary[]> {
42
+ return client.fetch(allPostsQuery);
43
+ }
44
+
45
+ // 2. Fetch a single post by slug
46
+ const postBySlugQuery = `*[_type == "post" && slug.current == $slug][0] {
47
+ _id,
48
+ title,
49
+ "slug": slug.current,
50
+ publishedAt,
51
+ excerpt,
52
+ "authorName": author->name,
53
+ body,
54
+ "categories": categories[]->{ title, "slug": slug.current }
55
+ }`;
56
+
57
+ export async function getPostBySlug(slug: string) {
58
+ return client.fetch(postBySlugQuery, { slug });
59
+ }
60
+
61
+ // 3. Fetch posts after a date
62
+ const recentPostsQuery = `*[_type == "post" && dateTime(publishedAt) > dateTime($since)] | order(publishedAt desc) {
63
+ _id,
64
+ title,
65
+ "slug": slug.current,
66
+ publishedAt
67
+ }`;
68
+
69
+ export async function getRecentPosts(since: string) {
70
+ return client.fetch(recentPostsQuery, { since });
71
+ }
72
+
73
+ // 4. Fetch posts by category
74
+ const postsByCategoryQuery = `*[_type == "post" && $categoryId in categories[]._ref] | order(publishedAt desc) {
75
+ _id,
76
+ title,
77
+ "slug": slug.current,
78
+ publishedAt
79
+ }`;
80
+
81
+ export async function getPostsByCategory(categoryId: string) {
82
+ return client.fetch(postsByCategoryQuery, { categoryId });
83
+ }
84
+ ```
85
+ rubrics:
86
+ - dimension: task-completion
87
+ rubricText: |
88
+ Score task completion from 0 to 100:
89
+ - 0: Couldn't attempt — missing critical information
90
+ - 20: Attempted but fundamentally wrong approach
91
+ - 50: Partial implementation — major functional gaps
92
+ - 80: Mostly complete — minor issues or missing edge cases
93
+ - 100: Fully functional code — works as expected
94
+
95
+ Must demonstrate:
96
+ - GROQ filter with _type == "post"
97
+ - Projection with aliased slug field ("slug": slug.current)
98
+ - Reference resolution with -> for author
99
+ - Ordering with | order(publishedAt desc)
100
+ - Slice/pagination syntax [0...10] or [0..9]
101
+ - Parameterized query with $slug for single post fetch
102
+ - Date filtering with dateTime() or string comparison
103
+ - Category filtering using references or array contains
104
+
105
+ Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
106
+ humanScore: 95
107
+ notes:
108
+ "All 8 criteria met. Clean implementation with types, correct GROQ
109
+ syntax throughout."
110
+ - dimension: code-correctness
111
+ rubricText: |
112
+ Score code correctness from 0 to 100:
113
+ - 0: Broken code, syntax errors, or deprecated APIs
114
+ - 30: Works but uses anti-patterns or inefficient approaches
115
+ - 50: Works but not idiomatic
116
+ - 80: Follows most best practices
117
+ - 100: Follows all best practices, idiomatic implementation
118
+
119
+ Check for:
120
+ - Valid GROQ syntax (proper filter brackets, projection braces)
121
+ - Uses @sanity/client createClient + client.fetch()
122
+ - Correct parameter passing syntax ($param)
123
+ - Proper reference dereference with ->
124
+ - No deprecated patterns
125
+
126
+ Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
127
+ humanScore: 90
128
+ notes:
129
+ "All GROQ syntax correct. Proper use of createClient, client.fetch,
130
+ params, and ->. Minor: could use const assertions for query strings."
131
+ - dimension: doc-coverage
132
+ rubricText: |
133
+ Score documentation coverage from 0 to 100:
134
+ - 0: Had to hallucinate/guess most implementation details
135
+ - 30: Significant gaps — filled with assumptions
136
+ - 50: Some gaps — inferred from partial information
137
+ - 80: Minor gaps — almost everything was documented
138
+ - 100: Complete coverage — all necessary info was in docs
139
+
140
+ Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
141
+ humanScore: 85
142
+ notes:
143
+ "All patterns match documented APIs. dateTime() usage, reference syntax,
144
+ slice pagination — all from docs. Minor: apiVersion string is a
145
+ reasonable guess."
146
+ gradedBy: "senior-engineer"
147
+ gradedAt: "2026-03-10"
148
+
149
+ - taskId: groq-blog-queries
150
+ area: groq
151
+ response: |
152
+ Here are GROQ queries for a blog:
153
+
154
+ ```javascript
155
+ import sanityClient from '@sanity/client'
156
+
157
+ const client = sanityClient({
158
+ projectId: 'abc123',
159
+ dataset: 'production'
160
+ })
161
+
162
+ // Get posts
163
+ const posts = await client.fetch('*[_type == "post"]{title, body}')
164
+
165
+ // Get post by slug
166
+ const post = await client.fetch('*[_type == "post" && slug == $slug]{...}', {slug: 'my-post'})
167
+
168
+ // Get recent posts
169
+ const recent = await client.fetch('*[_type == "post" && publishedAt > "2024-01-01"]')
170
+ ```
171
+ rubrics:
172
+ - dimension: task-completion
173
+ rubricText: |
174
+ Score task completion from 0 to 100:
175
+ - 0: Couldn't attempt — missing critical information
176
+ - 20: Attempted but fundamentally wrong approach
177
+ - 50: Partial implementation — major functional gaps
178
+ - 80: Mostly complete — minor issues or missing edge cases
179
+ - 100: Fully functional code — works as expected
180
+
181
+ Must demonstrate:
182
+ - GROQ filter with _type == "post"
183
+ - Projection with aliased slug field ("slug": slug.current)
184
+ - Reference resolution with -> for author
185
+ - Ordering with | order(publishedAt desc)
186
+ - Slice/pagination syntax [0...10] or [0..9]
187
+ - Parameterized query with $slug for single post fetch
188
+ - Date filtering with dateTime() or string comparison
189
+ - Category filtering using references or array contains
190
+
191
+ Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
192
+ humanScore: 30
193
+ notes:
194
+ "Only 3 of 8 criteria met (filter, parameterized slug, date filtering).
195
+ Missing: aliased slug, -> for author, ordering, pagination, category
196
+ filtering. Uses deprecated sanityClient import."
197
+ - dimension: code-correctness
198
+ rubricText: |
199
+ Score code correctness from 0 to 100:
200
+ - 0: Broken code, syntax errors, or deprecated APIs
201
+ - 30: Works but uses anti-patterns or inefficient approaches
202
+ - 50: Works but not idiomatic
203
+ - 80: Follows most best practices
204
+ - 100: Follows all best practices, idiomatic implementation
205
+
206
+ Check for:
207
+ - Valid GROQ syntax (proper filter brackets, projection braces)
208
+ - Uses @sanity/client createClient + client.fetch()
209
+ - Correct parameter passing syntax ($param)
210
+ - Proper reference dereference with ->
211
+ - No deprecated patterns
212
+
213
+ Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
214
+ humanScore: 20
215
+ notes:
216
+ "Uses deprecated sanityClient() instead of createClient(). slug == $slug
217
+ is wrong (should be slug.current). Missing apiVersion. Spread operator
218
+ {...} is not standard GROQ."
219
+ - dimension: doc-coverage
220
+ rubricText: |
221
+ Score documentation coverage from 0 to 100:
222
+ - 0: Had to hallucinate/guess most implementation details
223
+ - 30: Significant gaps — filled with assumptions
224
+ - 50: Some gaps — inferred from partial information
225
+ - 80: Minor gaps — almost everything was documented
226
+ - 100: Complete coverage — all necessary info was in docs
227
+
228
+ Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
229
+ humanScore: 25
230
+ notes:
231
+ "Appears to rely on outdated training data. Deprecated import, wrong
232
+ slug access pattern, no documented pagination or ordering patterns used."
233
+ gradedBy: "senior-engineer"
234
+ gradedAt: "2026-03-10"