@sanity/ailf 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (444) hide show
  1. package/canonical/grader-references/README.md +2 -2
  2. package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
  3. package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
  4. package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
  5. package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
  6. package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
  7. package/config/features.ts +1 -1
  8. package/config/models.ts +28 -23
  9. package/config/sources.ts +1 -1
  10. package/config/thresholds.ts +1 -1
  11. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
  12. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
  13. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
  14. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
  15. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
  16. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
  17. package/dist/_vendor/ailf-core/config-helpers.d.ts +6 -0
  18. package/dist/_vendor/ailf-core/config-helpers.js +29 -0
  19. package/dist/_vendor/ailf-core/examples/index.d.ts +164 -94
  20. package/dist/_vendor/ailf-core/examples/index.js +208 -114
  21. package/dist/_vendor/ailf-core/index.d.ts +1 -0
  22. package/dist/_vendor/ailf-core/index.js +1 -0
  23. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
  24. package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
  25. package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
  26. package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
  27. package/dist/_vendor/ailf-core/ports/context.d.ts +20 -1
  28. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
  29. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  30. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
  31. package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
  32. package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
  33. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -1
  34. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +14 -2
  35. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
  36. package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
  37. package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
  38. package/dist/_vendor/ailf-core/services/index.js +1 -1
  39. package/dist/_vendor/ailf-core/services/scoring.js +9 -0
  40. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +12 -1
  41. package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
  42. package/dist/_vendor/ailf-core/types/index.d.ts +47 -4
  43. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +27 -0
  44. package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
  45. package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
  46. package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
  47. package/dist/_vendor/ailf-tasks/cli.js +61 -0
  48. package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
  49. package/dist/_vendor/ailf-tasks/index.js +16 -0
  50. package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
  51. package/dist/_vendor/ailf-tasks/parser.js +73 -0
  52. package/dist/_vendor/ailf-tasks/schemas.d.ts +198 -0
  53. package/dist/_vendor/ailf-tasks/schemas.js +180 -0
  54. package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
  55. package/dist/_vendor/ailf-tasks/validation.js +162 -0
  56. package/dist/adapters/api-client/remediation.js +2 -2
  57. package/dist/adapters/config-sources/file-config-adapter.js +6 -1
  58. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
  59. package/dist/adapters/index.d.ts +0 -1
  60. package/dist/adapters/index.js +0 -1
  61. package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
  62. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  63. package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
  64. package/dist/adapters/task-sources/content-lake-task-source.js +4 -6
  65. package/dist/adapters/task-sources/index.d.ts +1 -2
  66. package/dist/adapters/task-sources/index.js +1 -2
  67. package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
  68. package/dist/adapters/task-sources/repo-schemas.js +2 -2
  69. package/dist/adapters/task-sources/repo-task-source.js +1 -1
  70. package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
  71. package/dist/adapters/task-sources/repo-trigger.js +1 -1
  72. package/dist/adapters/task-sources/task-file-loader.d.ts +9 -6
  73. package/dist/adapters/task-sources/task-file-loader.js +20 -6
  74. package/dist/agent-observer/test-imports.d.ts +7 -0
  75. package/dist/agent-observer/test-imports.js +185 -0
  76. package/dist/artifact-capture/comparator.d.ts +22 -0
  77. package/dist/artifact-capture/comparator.js +493 -0
  78. package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
  79. package/dist/artifact-capture/filesystem-collector.js +237 -0
  80. package/dist/artifact-capture/redact-artifact.d.ts +20 -0
  81. package/dist/artifact-capture/redact-artifact.js +115 -0
  82. package/dist/assertions/source-isolation.d.ts +1 -1
  83. package/dist/assertions/source-isolation.js +1 -1
  84. package/dist/cli.js +4 -0
  85. package/dist/commands/calculate-scores.js +1 -0
  86. package/dist/commands/capture-compare.d.ts +15 -0
  87. package/dist/commands/capture-compare.js +253 -0
  88. package/dist/commands/capture-list.d.ts +12 -0
  89. package/dist/commands/capture-list.js +147 -0
  90. package/dist/commands/capture.d.ts +9 -0
  91. package/dist/commands/capture.js +16 -0
  92. package/dist/commands/chronic-failures.d.ts +8 -0
  93. package/dist/commands/chronic-failures.js +33 -0
  94. package/dist/commands/explain-handler.d.ts +1 -1
  95. package/dist/commands/explain-handler.js +37 -8
  96. package/dist/commands/fetch-docs.js +1 -0
  97. package/dist/commands/generate-configs.d.ts +3 -3
  98. package/dist/commands/generate-configs.js +20 -8
  99. package/dist/commands/init.d.ts +2 -3
  100. package/dist/commands/init.js +56 -170
  101. package/dist/commands/pipeline-action.d.ts +7 -1
  102. package/dist/commands/pipeline-action.js +43 -19
  103. package/dist/commands/pipeline.d.ts +6 -1
  104. package/dist/commands/pipeline.js +7 -2
  105. package/dist/commands/pr-comment.js +1 -0
  106. package/dist/commands/publish.js +1 -0
  107. package/dist/commands/shared/help.js +2 -2
  108. package/dist/commands/update-quality-scores.d.ts +5 -0
  109. package/dist/commands/update-quality-scores.js +20 -0
  110. package/dist/composition-root.d.ts +2 -3
  111. package/dist/composition-root.js +27 -14
  112. package/dist/config/features.ts +23 -0
  113. package/dist/config/models.ts +100 -0
  114. package/dist/config/prompts.ts +16 -0
  115. package/dist/config/rubrics.ts +225 -0
  116. package/dist/config/schedules.ts +47 -0
  117. package/dist/config/sinks.ts +37 -0
  118. package/dist/config/sources.ts +21 -0
  119. package/dist/config/thresholds.ts +61 -0
  120. package/dist/lib/agent-behavior-report.d.ts +8 -0
  121. package/dist/lib/agent-behavior-report.js +185 -0
  122. package/dist/lib/baseline.d.ts +19 -0
  123. package/dist/lib/baseline.js +153 -0
  124. package/dist/lib/calculate-scores.d.ts +23 -0
  125. package/dist/lib/calculate-scores.js +42 -0
  126. package/dist/lib/compare.d.ts +18 -0
  127. package/dist/lib/compare.js +170 -0
  128. package/dist/lib/coverage-audit.d.ts +4 -0
  129. package/dist/lib/coverage-audit.js +42 -0
  130. package/dist/lib/discovery-report.d.ts +13 -0
  131. package/dist/lib/discovery-report.js +57 -0
  132. package/dist/lib/fetch-docs.d.ts +30 -0
  133. package/dist/lib/fetch-docs.js +171 -0
  134. package/dist/lib/generate-configs.d.ts +25 -0
  135. package/dist/lib/generate-configs.js +42 -0
  136. package/dist/lib/grader-api.d.ts +21 -0
  137. package/dist/lib/grader-api.js +34 -0
  138. package/dist/lib/grader-compare.d.ts +19 -0
  139. package/dist/lib/grader-compare.js +91 -0
  140. package/dist/lib/grader-consistency.d.ts +27 -0
  141. package/dist/lib/grader-consistency.js +79 -0
  142. package/dist/lib/grader-sensitivity.d.ts +19 -0
  143. package/dist/lib/grader-sensitivity.js +75 -0
  144. package/dist/lib/grader-validate.d.ts +19 -0
  145. package/dist/lib/grader-validate.js +78 -0
  146. package/dist/lib/measure-retrieval.d.ts +14 -0
  147. package/dist/lib/measure-retrieval.js +71 -0
  148. package/dist/lib/pr-comment.d.ts +16 -0
  149. package/dist/lib/pr-comment.js +28 -0
  150. package/dist/lib/readiness-report.d.ts +13 -0
  151. package/dist/lib/readiness-report.js +108 -0
  152. package/dist/lib/webhook-server.d.ts +11 -0
  153. package/dist/lib/webhook-server.js +24 -0
  154. package/dist/lib/weekly-digest.d.ts +24 -0
  155. package/dist/lib/weekly-digest.js +148 -0
  156. package/dist/orchestration/build-app-context.js +13 -0
  157. package/dist/orchestration/cache-context.d.ts +23 -0
  158. package/dist/orchestration/cache-context.js +43 -0
  159. package/dist/orchestration/env-bridge.d.ts +21 -0
  160. package/dist/orchestration/env-bridge.js +66 -0
  161. package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
  162. package/dist/orchestration/load-pipeline-tasks.js +52 -0
  163. package/dist/orchestration/pipeline-orchestrator.js +75 -5
  164. package/dist/orchestration/step-runner.js +5 -1
  165. package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
  166. package/dist/orchestration/steps/calculate-scores-step.js +13 -0
  167. package/dist/orchestration/steps/callback-step.js +10 -1
  168. package/dist/orchestration/steps/compare-step.js +6 -3
  169. package/dist/orchestration/steps/discovery-report-step.js +6 -2
  170. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  171. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  172. package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
  173. package/dist/orchestration/steps/fetch-docs-step.js +30 -16
  174. package/dist/orchestration/steps/gap-analysis-step.js +13 -2
  175. package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
  176. package/dist/orchestration/steps/generate-configs-step.js +50 -15
  177. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
  178. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  179. package/dist/orchestration/steps/publish-report-step.js +19 -0
  180. package/dist/orchestration/steps/readiness-step.js +8 -3
  181. package/dist/orchestration/steps/report-step.js +17 -4
  182. package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
  183. package/dist/orchestration/steps/run-eval-step.js +52 -32
  184. package/dist/pipeline/agent-behavior-report.js +6 -0
  185. package/dist/pipeline/attribution.d.ts +1 -1
  186. package/dist/pipeline/attribution.js +1 -1
  187. package/dist/pipeline/cache.js +29 -15
  188. package/dist/pipeline/calculate-scores.d.ts +2 -0
  189. package/dist/pipeline/calculate-scores.js +70 -33
  190. package/dist/pipeline/checks.d.ts +8 -3
  191. package/dist/pipeline/checks.js +23 -3
  192. package/dist/pipeline/chronic-failures.d.ts +55 -0
  193. package/dist/pipeline/chronic-failures.js +110 -0
  194. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +33 -0
  195. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
  196. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
  197. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
  198. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
  199. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
  200. package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
  201. package/dist/pipeline/compiler/assertion-mapper.js +1 -1
  202. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
  203. package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
  204. package/dist/pipeline/compiler/config-loader.d.ts +14 -0
  205. package/dist/pipeline/compiler/config-loader.js +42 -2
  206. package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
  207. package/dist/pipeline/compiler/fixture-resolver.js +1 -1
  208. package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
  209. package/dist/pipeline/compiler/ignore-fields.js +1 -1
  210. package/dist/pipeline/compiler/index.d.ts +2 -5
  211. package/dist/pipeline/compiler/index.js +2 -5
  212. package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
  213. package/dist/pipeline/compiler/literacy-bridge.js +1 -1
  214. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +1 -1
  215. package/dist/pipeline/compiler/mode-bases/agent-harness.js +1 -1
  216. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +1 -1
  217. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +1 -1
  218. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +13 -2
  219. package/dist/pipeline/compiler/mode-bases/literacy.js +55 -1
  220. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +1 -1
  221. package/dist/pipeline/compiler/mode-bases/mcp-server.js +1 -1
  222. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +1 -1
  223. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -1
  224. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
  225. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
  226. package/dist/pipeline/compiler/mode-handlers/index.d.ts +2 -2
  227. package/dist/pipeline/compiler/mode-handlers/index.js +2 -2
  228. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +1 -1
  229. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +1 -1
  230. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
  231. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
  232. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
  233. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +1 -1
  234. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
  235. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
  236. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
  237. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +334 -0
  238. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +1 -1
  239. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +1 -1
  240. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +1 -1
  241. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +1 -1
  242. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +4 -0
  243. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +69 -0
  244. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +307 -0
  245. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +22 -5
  246. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +6 -0
  247. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +10 -5
  248. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +314 -7
  249. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +10 -0
  250. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
  251. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
  252. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +1 -1
  253. package/dist/pipeline/compiler/presets/sanity-literacy.js +1 -1
  254. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
  255. package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
  256. package/dist/pipeline/compiler/provider-assembler.js +13 -7
  257. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
  258. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
  259. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
  260. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
  261. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
  262. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
  263. package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
  264. package/dist/pipeline/compiler/sandbox/index.js +1 -1
  265. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
  266. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
  267. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
  268. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
  269. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
  270. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
  271. package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
  272. package/dist/pipeline/compiler/scoring-bridge.js +1 -1
  273. package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
  274. package/dist/pipeline/compiler/task-bridge.js +92 -0
  275. package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
  276. package/dist/pipeline/compiler/task-graph-builder.js +1 -4
  277. package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
  278. package/dist/pipeline/compiler/telemetry/index.js +1 -1
  279. package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
  280. package/dist/pipeline/compiler/variable-resolver.js +1 -1
  281. package/dist/pipeline/coverage-audit.d.ts +1 -1
  282. package/dist/pipeline/coverage-audit.js +1 -1
  283. package/dist/pipeline/degradations.d.ts +1 -1
  284. package/dist/pipeline/degradations.js +1 -1
  285. package/dist/pipeline/failure-modes.d.ts +1 -1
  286. package/dist/pipeline/failure-modes.js +13 -1
  287. package/dist/pipeline/gap-analysis.d.ts +1 -1
  288. package/dist/pipeline/gap-analysis.js +3 -1
  289. package/dist/pipeline/generate-configs.d.ts +2 -2
  290. package/dist/pipeline/generate-configs.js +15 -8
  291. package/dist/pipeline/grader-compare-runner.d.ts +1 -1
  292. package/dist/pipeline/grader-compare-runner.js +7 -1
  293. package/dist/pipeline/grader-comparison.d.ts +1 -1
  294. package/dist/pipeline/grader-comparison.js +1 -1
  295. package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
  296. package/dist/pipeline/grader-consistency-runner.js +7 -1
  297. package/dist/pipeline/grader-consistency.d.ts +1 -1
  298. package/dist/pipeline/grader-consistency.js +1 -1
  299. package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
  300. package/dist/pipeline/grader-sensitivity-runner.js +1 -1
  301. package/dist/pipeline/grader-sensitivity.d.ts +1 -1
  302. package/dist/pipeline/grader-sensitivity.js +1 -1
  303. package/dist/pipeline/grader-validate-runner.d.ts +1 -1
  304. package/dist/pipeline/grader-validate-runner.js +2 -2
  305. package/dist/pipeline/grader-validation.d.ts +1 -1
  306. package/dist/pipeline/grader-validation.js +1 -1
  307. package/dist/pipeline/map-request-to-config.js +15 -2
  308. package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
  309. package/dist/pipeline/mirror-repo-tasks.js +1 -1
  310. package/dist/pipeline/plan-format.d.ts +1 -1
  311. package/dist/pipeline/plan-format.js +1 -1
  312. package/dist/pipeline/plan.d.ts +1 -1
  313. package/dist/pipeline/plan.js +67 -29
  314. package/dist/pipeline/probe.d.ts +1 -1
  315. package/dist/pipeline/probe.js +1 -1
  316. package/dist/pipeline/readiness-report.d.ts +2 -2
  317. package/dist/pipeline/readiness-report.js +2 -2
  318. package/dist/pipeline/release-classification.d.ts +1 -1
  319. package/dist/pipeline/release-classification.js +1 -1
  320. package/dist/pipeline/release-report.d.ts +1 -1
  321. package/dist/pipeline/release-report.js +1 -1
  322. package/dist/pipeline/repo-eval-comment.d.ts +1 -1
  323. package/dist/pipeline/repo-eval-comment.js +1 -1
  324. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  325. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  326. package/dist/pipeline/resolve-mappings.d.ts +6 -6
  327. package/dist/pipeline/resolve-mappings.js +44 -44
  328. package/dist/pipeline/retrieval-metrics.d.ts +3 -3
  329. package/dist/pipeline/retrieval-metrics.js +28 -20
  330. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  331. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  332. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  333. package/dist/pipeline/steps/compare-step.js +90 -0
  334. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  335. package/dist/pipeline/steps/eval-step.js +347 -0
  336. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  337. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  338. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  339. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  340. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  341. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  342. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  343. package/dist/pipeline/steps/publish-report-step.js +243 -0
  344. package/dist/pipeline/steps/report-step.d.ts +13 -0
  345. package/dist/pipeline/steps/report-step.js +56 -0
  346. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  347. package/dist/pipeline/steps/update-scores-step.js +42 -0
  348. package/dist/pipeline/targeted-loo.d.ts +1 -1
  349. package/dist/pipeline/targeted-loo.js +1 -1
  350. package/dist/pipeline/thresholds.d.ts +1 -1
  351. package/dist/pipeline/thresholds.js +1 -1
  352. package/dist/pipeline/validate.js +13 -0
  353. package/dist/report-store.d.ts +17 -0
  354. package/dist/report-store.js +24 -0
  355. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  356. package/dist/scripts/agent-behavior-report.js +315 -0
  357. package/dist/scripts/baseline.d.ts +43 -0
  358. package/dist/scripts/baseline.js +267 -0
  359. package/dist/scripts/calculate-scores.d.ts +166 -0
  360. package/dist/scripts/calculate-scores.js +1296 -0
  361. package/dist/scripts/compare.d.ts +22 -0
  362. package/dist/scripts/compare.js +334 -0
  363. package/dist/scripts/coverage-audit.d.ts +44 -0
  364. package/dist/scripts/coverage-audit.js +209 -0
  365. package/dist/scripts/debug-eval.d.ts +19 -0
  366. package/dist/scripts/debug-eval.js +73 -0
  367. package/dist/scripts/discovery-report.d.ts +58 -0
  368. package/dist/scripts/discovery-report.js +250 -0
  369. package/dist/scripts/fetch-docs.d.ts +35 -0
  370. package/dist/scripts/fetch-docs.js +472 -0
  371. package/dist/scripts/generate-configs.d.ts +66 -0
  372. package/dist/scripts/generate-configs.js +459 -0
  373. package/dist/scripts/grader-api.d.ts +27 -0
  374. package/dist/scripts/grader-api.js +206 -0
  375. package/dist/scripts/grader-compare.d.ts +22 -0
  376. package/dist/scripts/grader-compare.js +368 -0
  377. package/dist/scripts/grader-consistency.d.ts +20 -0
  378. package/dist/scripts/grader-consistency.js +313 -0
  379. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  380. package/dist/scripts/grader-sensitivity.js +354 -0
  381. package/dist/scripts/grader-validate.d.ts +19 -0
  382. package/dist/scripts/grader-validate.js +267 -0
  383. package/dist/scripts/measure-retrieval.d.ts +10 -0
  384. package/dist/scripts/measure-retrieval.js +145 -0
  385. package/dist/scripts/migrate-task-mode.d.ts +1 -1
  386. package/dist/scripts/migrate-task-mode.js +1 -1
  387. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
  388. package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
  389. package/dist/scripts/pipeline.d.ts +76 -0
  390. package/dist/scripts/pipeline.js +1031 -0
  391. package/dist/scripts/pr-comment.d.ts +10 -0
  392. package/dist/scripts/pr-comment.js +510 -0
  393. package/dist/scripts/readiness-report.d.ts +88 -0
  394. package/dist/scripts/readiness-report.js +342 -0
  395. package/dist/scripts/update-quality-scores.d.ts +15 -0
  396. package/dist/scripts/update-quality-scores.js +184 -0
  397. package/dist/scripts/validate-task-sources.d.ts +1 -1
  398. package/dist/scripts/validate-task-sources.js +1 -1
  399. package/dist/scripts/validate.d.ts +13 -0
  400. package/dist/scripts/validate.js +79 -0
  401. package/dist/scripts/webhook-server.d.ts +26 -0
  402. package/dist/scripts/webhook-server.js +147 -0
  403. package/dist/scripts/weekly-digest.d.ts +24 -0
  404. package/dist/scripts/weekly-digest.js +144 -0
  405. package/dist/sinks/format-slack.d.ts +64 -0
  406. package/dist/sinks/format-slack.js +306 -0
  407. package/dist/sinks/slack-sink.d.ts +27 -0
  408. package/dist/sinks/slack-sink.js +78 -0
  409. package/dist/sinks/types.d.ts +1 -1
  410. package/dist/sinks/types.js +1 -1
  411. package/dist/sinks/webhook-sink.d.ts +19 -0
  412. package/dist/sinks/webhook-sink.js +50 -0
  413. package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
  414. package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
  415. package/dist/tasks/literacy/content-lake.task.ts +181 -0
  416. package/dist/tasks/literacy/frameworks.task.ts +129 -0
  417. package/dist/tasks/literacy/functions.task.ts +70 -0
  418. package/dist/tasks/literacy/groq.task.ts +259 -0
  419. package/dist/tasks/literacy/image-handling.task.ts +95 -0
  420. package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
  421. package/dist/tasks/literacy/portable-text.task.ts +169 -0
  422. package/dist/tasks/literacy/studio-setup.task.ts +134 -0
  423. package/dist/tasks/literacy/visual-editing.task.ts +147 -0
  424. package/package.json +25 -25
  425. package/tasks/.expanded.agentic.yaml +280 -0
  426. package/tasks/.expanded.yaml +565 -0
  427. package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
  428. package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
  429. package/tasks/literacy/content-lake.task.ts +181 -0
  430. package/tasks/literacy/frameworks.task.ts +1 -0
  431. package/tasks/literacy/functions.task.ts +1 -0
  432. package/tasks/literacy/groq.task.ts +1 -0
  433. package/tasks/literacy/image-handling.task.ts +95 -0
  434. package/tasks/literacy/nextjs-live.task.ts +2 -1
  435. package/tasks/literacy/portable-text.task.ts +169 -0
  436. package/tasks/literacy/studio-setup.task.ts +5 -2
  437. package/tasks/literacy/visual-editing.task.ts +1 -0
  438. package/LICENSE +0 -21
  439. package/tasks/frameworks.yaml +0 -98
  440. package/tasks/functions.yaml +0 -51
  441. package/tasks/groq.yaml +0 -216
  442. package/tasks/nextjs-live.yaml +0 -62
  443. package/tasks/studio-setup.yaml +0 -111
  444. package/tasks/visual-editing.yaml +0 -120
@@ -0,0 +1,134 @@
1
+ import { defineTask } from "../../_vendor/ailf-core/index.js"
2
+
3
+ // See content-lake.task.ts for detailed explanations of common task properties
4
+ export default [
5
+ defineTask({
6
+ id: "studio-blog-schema",
7
+ mode: "literacy",
8
+ title: "Studio Setup - Blog schema with posts, authors, categories",
9
+ description: "Studio Setup - Blog schema with posts, authors, categories",
10
+ area: "studio-setup",
11
+ docCoverage: true,
12
+ context: {
13
+ docs: [
14
+ {
15
+ slug: "schemas-and-forms",
16
+ reason: "High-level overview of schemas and the form builder",
17
+ },
18
+ {
19
+ slug: "introduction-to-schemas",
20
+ reason: "Introduces schema concepts, defineType/defineField",
21
+ },
22
+ {
23
+ slug: "config-api-reference",
24
+ reason:
25
+ "Configuration API — defineConfig, plugins, schema registration",
26
+ },
27
+ {
28
+ slug: "reference-type",
29
+ reason: "Reference field type for author/category relationships",
30
+ },
31
+ ],
32
+ },
33
+ referenceSolution: "reference-solutions/studio-setup/blog-schema.ts",
34
+ prompt: {
35
+ vars: {
36
+ task: `Set up a new Sanity Studio with a custom schema for a blog:
37
+
38
+ 1. Create document types for: posts, authors, categories
39
+ 2. Posts should have: title, slug, body (portable text), author reference, categories array
40
+ 3. Authors should have: name, bio, image
41
+ 4. Categories should have: title, description
42
+
43
+ Include the schema definitions and sanity.config.ts setup.`,
44
+ docs: "file://contexts/canonical/studio-blog-schema.md",
45
+ },
46
+ },
47
+ assertions: [
48
+ {
49
+ type: "llm-rubric",
50
+ template: "task-completion",
51
+ criteria: [
52
+ "Three document types (post, author, category)",
53
+ "Post with all required fields including portable text body",
54
+ "Reference from post to author",
55
+ "Array of references from post to categories",
56
+ "sanity.config.ts with schema registration",
57
+ ],
58
+ },
59
+ {
60
+ type: "llm-rubric",
61
+ template: "code-correctness",
62
+ criteria: [
63
+ "Uses defineConfig, defineType, defineField (v3 syntax)",
64
+ "Does NOT use createSchema (deprecated v2)",
65
+ "Proper reference syntax with 'to' array",
66
+ "Correct portable text array definition",
67
+ ],
68
+ },
69
+ { type: "contains", value: "defineConfig", weight: 1 },
70
+ { type: "contains", value: "defineType", weight: 1 },
71
+ { type: "contains", value: "defineField", weight: 1 },
72
+ // not-contains: FAILS the assertion if the string appears — penalizes deprecated v2 schema API
73
+ { type: "not-contains", value: "createSchema", weight: 1 },
74
+ { type: "not-contains", value: "import Schema from", weight: 1 },
75
+ ],
76
+ }),
77
+
78
+ defineTask({
79
+ id: "studio-custom-tool",
80
+ mode: "literacy",
81
+ title: "Studio Setup - Custom tool in sidebar",
82
+ description: "Studio Setup - Custom tool in sidebar",
83
+ area: "studio-setup",
84
+ context: {
85
+ docs: [
86
+ {
87
+ slug: "studio-tools",
88
+ reason: "Overview of Studio tools system",
89
+ },
90
+ {
91
+ slug: "tool-api-reference",
92
+ reason: "Tool API — name, title, icon, component properties",
93
+ },
94
+ {
95
+ slug: "custom-studio-tool",
96
+ reason: "Step-by-step guide for creating a custom tool",
97
+ },
98
+ ],
99
+ },
100
+ referenceSolution: "reference-solutions/studio-setup/custom-tool.tsx",
101
+ prompt: {
102
+ vars: {
103
+ task: `Add a custom tool to the Sanity Studio sidebar that displays
104
+ a dashboard. The tool should:
105
+
106
+ 1. Appear in the studio navigation with a custom icon
107
+ 2. Have a title and name
108
+ 3. Render a React component showing a "Dashboard" heading
109
+
110
+ Provide the tool definition and sanity.config.ts registration.`,
111
+ docs: "file://contexts/canonical/studio-custom-tool.md",
112
+ },
113
+ },
114
+ assertions: [
115
+ {
116
+ type: "llm-rubric",
117
+ template: "task-completion",
118
+ criteria: [
119
+ "Tool object with name, title, icon, component",
120
+ "React component for the tool UI",
121
+ "Registration in sanity.config.ts tools array",
122
+ ],
123
+ },
124
+ { type: "contains", value: "tools", weight: 1 },
125
+ // javascript: custom JS assertion — `output` is the model's response string; must return boolean
126
+ {
127
+ type: "javascript",
128
+ value: `return output.includes('name:') &&
129
+ output.includes('component') &&
130
+ (output.includes('icon:') || output.includes('Icon'))`,
131
+ },
132
+ ],
133
+ }),
134
+ ]
@@ -0,0 +1,147 @@
1
+ import { defineTask } from "../../_vendor/ailf-core/index.js"
2
+
3
+ // See content-lake.task.ts for detailed explanations of common task properties
4
+ export default [
5
+ defineTask({
6
+ id: "visual-editing-presentation",
7
+ mode: "literacy",
8
+ title: "Visual Editing - Presentation tool with click-to-edit",
9
+ description: "Visual Editing - Presentation tool with click-to-edit",
10
+ area: "visual-editing",
11
+ docCoverage: true,
12
+ context: {
13
+ docs: [
14
+ {
15
+ slug: "configuring-the-presentation-tool",
16
+ reason: "Core presentationTool configuration and setup",
17
+ },
18
+ {
19
+ slug: "introduction-to-visual-editing",
20
+ reason: "Visual Editing concepts — stega, overlays, data attributes",
21
+ },
22
+ {
23
+ slug: "visual-editing-with-next-js-app-router",
24
+ reason: "Next.js App Router-specific visual editing guide",
25
+ },
26
+ {
27
+ slug: "stega",
28
+ reason: "Stega encoding for click-to-edit data attributes",
29
+ },
30
+ ],
31
+ },
32
+ referenceSolution:
33
+ "reference-solutions/visual-editing/presentation-nextjs.tsx",
34
+ prompt: {
35
+ vars: {
36
+ task: `Set up the Presentation tool with a Next.js 14 (App Router) frontend
37
+ and implement click-to-edit functionality:
38
+
39
+ 1. Configure the Presentation tool in sanity.config.ts
40
+ 2. Set up the Next.js app to work with Visual Editing
41
+ 3. Implement data attributes so clicking content in the preview
42
+ opens the corresponding field in Studio
43
+
44
+ Provide all necessary code for both Studio and Next.js sides.`,
45
+ docs: "file://contexts/canonical/visual-editing-presentation.md",
46
+ },
47
+ },
48
+ assertions: [
49
+ {
50
+ type: "llm-rubric",
51
+ template: "task-completion",
52
+ criteria: [
53
+ "presentationTool configured in sanity.config.ts",
54
+ "previewUrl or equivalent configured",
55
+ "Data attributes for click-to-edit (createDataAttribute or stega)",
56
+ "Next.js App Router patterns used correctly",
57
+ ],
58
+ },
59
+ {
60
+ type: "llm-rubric",
61
+ template: "code-correctness",
62
+ criteria: [
63
+ "Uses @sanity/presentation (not deprecated packages)",
64
+ "Uses createDataAttribute or stega encoding correctly",
65
+ "Proper Next.js App Router patterns (not Pages Router)",
66
+ "No mixing of deprecated and current APIs",
67
+ ],
68
+ },
69
+ { type: "contains", value: "presentationTool", weight: 1 },
70
+ {
71
+ type: "contains-any",
72
+ value: [
73
+ "createDataAttribute",
74
+ "data-sanity",
75
+ "encodeDataAttribute",
76
+ "stega",
77
+ ],
78
+ weight: 1,
79
+ },
80
+ { type: "not-contains", value: "@sanity/preview-kit", weight: 1 },
81
+ ],
82
+ }),
83
+
84
+ defineTask({
85
+ id: "visual-editing-live-preview",
86
+ mode: "literacy",
87
+ title: "Visual Editing - Live preview with draft content",
88
+ description: "Visual Editing - Live preview with draft content",
89
+ area: "visual-editing",
90
+ context: {
91
+ docs: [
92
+ {
93
+ slug: "live-content-api",
94
+ reason: "Live Content API — defineLive, real-time subscriptions",
95
+ },
96
+ {
97
+ slug: "perspectives",
98
+ reason: "Draft vs published perspectives",
99
+ },
100
+ {
101
+ slug: "fetching-content-for-visual-editing",
102
+ reason: "Data fetching patterns for visual editing contexts",
103
+ },
104
+ ],
105
+ },
106
+ referenceSolution: "reference-solutions/visual-editing/live-preview.tsx",
107
+ prompt: {
108
+ vars: {
109
+ task: `Implement live preview in a Next.js app that shows draft content
110
+ from Sanity in real-time as editors make changes in the Studio.
111
+
112
+ Requirements:
113
+ - Use the Live Content API approach
114
+ - Handle draft vs published perspectives correctly
115
+ - Show real-time updates without page refresh
116
+
117
+ Provide a complete implementation.`,
118
+ docs: "file://contexts/canonical/visual-editing-live-preview.md",
119
+ },
120
+ },
121
+ assertions: [
122
+ {
123
+ type: "llm-rubric",
124
+ template: "task-completion",
125
+ criteria: [
126
+ "Live Content API usage (defineLive, useLiveQuery, or sanityFetch with live option)",
127
+ "Draft perspective configuration",
128
+ "Real-time subscription/update mechanism",
129
+ ],
130
+ },
131
+ {
132
+ type: "llm-rubric",
133
+ template: "code-correctness",
134
+ criteria: [
135
+ "Modern API usage (not deprecated preview packages)",
136
+ "Proper perspective handling",
137
+ "Correct subscription lifecycle management",
138
+ ],
139
+ },
140
+ {
141
+ type: "contains-any",
142
+ value: ["useLiveQuery", "defineLive", "live:", "perspective"],
143
+ weight: 1,
144
+ },
145
+ ],
146
+ }),
147
+ ]
package/package.json CHANGED
@@ -1,9 +1,9 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "2.0.0",
3
+ "version": "2.0.2",
4
4
  "private": false,
5
5
  "publishConfig": {
6
- "access": "restricted"
6
+ "access": "public"
7
7
  },
8
8
  "license": "MIT",
9
9
  "repository": {
@@ -31,28 +31,6 @@
31
31
  "canonical",
32
32
  "tasks"
33
33
  ],
34
- "dependencies": {
35
- "@google-cloud/bigquery": "^8.1.1",
36
- "@inquirer/prompts": "^8.3.0",
37
- "@modelcontextprotocol/sdk": "^1.29.0",
38
- "@portabletext/markdown": "^1.0.0",
39
- "@sanity/client": "^7.3.0",
40
- "commander": "^14.0.3",
41
- "dotenv": "^16.4.7",
42
- "dotenv-cli": "^11.0.0",
43
- "jiti": "^2.6.1",
44
- "js-yaml": "^4.1.0",
45
- "promptfoo": "^0.120.24",
46
- "zod": "^4.3.6"
47
- },
48
- "devDependencies": {
49
- "@types/js-yaml": "^4.0.9",
50
- "@types/node": "^22.13.1",
51
- "tsx": "^4.19.2",
52
- "typescript": "^5.7.3",
53
- "@sanity/ailf-shared": "0.1.0",
54
- "@sanity/ailf-core": "0.1.0"
55
- },
56
34
  "scripts": {
57
35
  "build": "tsc && tsx scripts/bundle-workspace-deps.ts",
58
36
  "generate-configs": "tsx src/cli.ts generate-configs",
@@ -80,5 +58,27 @@
80
58
  "discovery-report": "tsx src/cli.ts discovery-report",
81
59
  "webhook-server": "tsx src/cli.ts webhook-server",
82
60
  "weekly-digest": "tsx src/cli.ts weekly-digest"
61
+ },
62
+ "dependencies": {
63
+ "@google-cloud/bigquery": "^8.1.1",
64
+ "@inquirer/prompts": "^8.3.0",
65
+ "@modelcontextprotocol/sdk": "^1.29.0",
66
+ "@portabletext/markdown": "^1.0.0",
67
+ "@sanity/client": "^7.3.0",
68
+ "commander": "^14.0.3",
69
+ "dotenv": "^16.4.7",
70
+ "dotenv-cli": "^11.0.0",
71
+ "jiti": "^2.6.1",
72
+ "js-yaml": "^4.1.0",
73
+ "promptfoo": "^0.120.24",
74
+ "zod": "^4.3.6"
75
+ },
76
+ "devDependencies": {
77
+ "@sanity/ailf-core": "workspace:*",
78
+ "@sanity/ailf-shared": "workspace:*",
79
+ "@types/js-yaml": "^4.0.9",
80
+ "@types/node": "^22.13.1",
81
+ "tsx": "^4.19.2",
82
+ "typescript": "^5.7.3"
83
83
  }
84
- }
84
+ }
@@ -0,0 +1,280 @@
1
+ # .expanded.agentic.yaml
2
+ #
3
+ # AUTO-GENERATED by compiler pipeline — do not edit directly.
4
+ # Gold entries only (no baseline) for agentic evaluation mode.
5
+ # Run: npx @sanity/ailf generate-configs
6
+
7
+ - description: GROQ - Blog queries with filtering and pagination (gold)
8
+ vars:
9
+ task: |-
10
+ Write GROQ queries for a Sanity blog application:
11
+
12
+ 1. Fetch all published blog posts ordered by publishedAt descending,
13
+ with a projection that includes: _id, title, slug (from slug.current),
14
+ publishedAt, excerpt, and the author's name (resolved from a reference)
15
+ 2. Add pagination to return only the first 10 results
16
+ 3. Fetch a single post by its slug parameter, including the full body
17
+ content and resolved author and category references
18
+ 4. Fetch posts published after a specific date
19
+ 5. Fetch posts that belong to a specific category (where categories
20
+ is an array of references)
21
+
22
+ Use @sanity/client with client.fetch() for all queries. Include
23
+ TypeScript types for the query results.
24
+ docs: file://contexts/canonical/groq-blog-queries.md
25
+ __featureArea: groq
26
+ assert:
27
+ - type: llm-rubric
28
+ value: |-
29
+ Score task completion from 0 to 100:
30
+ - 0: Couldn't attempt — missing critical information
31
+ - 20: Attempted but fundamentally wrong approach
32
+ - 50: Partial implementation — major functional gaps
33
+ - 80: Mostly complete — minor issues or missing edge cases
34
+ - 100: Fully functional code — works as expected
35
+
36
+ Must demonstrate:
37
+ - GROQ filter with _type == "post"
38
+ - Projection with aliased slug field ("slug": slug.current)
39
+ - Reference resolution with -> for author
40
+ - Ordering with | order(publishedAt desc)
41
+ - Slice/pagination syntax [0...10] or [0..9]
42
+ - Parameterized query with $slug for single post fetch
43
+ - Date filtering with dateTime() or string comparison
44
+ - Category filtering using references or array contains
45
+
46
+ Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
47
+ provider: anthropic:messages:claude-opus-4-5-20251101
48
+ metadata:
49
+ dimension: task-completion
50
+ maxScore: 100
51
+ - type: llm-rubric
52
+ value: |-
53
+ Score code correctness from 0 to 100:
54
+ - 0: Broken code, syntax errors, or deprecated APIs
55
+ - 30: Works but uses anti-patterns or inefficient approaches
56
+ - 50: Works but not idiomatic
57
+ - 80: Follows most best practices
58
+ - 100: Follows all best practices, idiomatic implementation
59
+
60
+ Check for:
61
+ - Valid GROQ syntax (proper filter brackets, projection braces)
62
+ - Uses @sanity/client createClient + client.fetch()
63
+ - Correct parameter passing syntax ($param)
64
+ - Proper reference dereference with ->
65
+ - No deprecated patterns
66
+
67
+ Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
68
+ provider: anthropic:messages:claude-opus-4-5-20251101
69
+ metadata:
70
+ dimension: code-correctness
71
+ maxScore: 100
72
+ - type: contains-any
73
+ value:
74
+ - client.fetch
75
+ - createClient
76
+ weight: 1
77
+ - type: contains-any
78
+ value:
79
+ - order(publishedAt
80
+ - order(_createdAt
81
+ - '| order('
82
+ weight: 1
83
+ - type: contains-any
84
+ value:
85
+ - '[0...10]'
86
+ - '[0..9]'
87
+ - '[0...'
88
+ weight: 1
89
+ - type: llm-rubric
90
+ value: |-
91
+ Score documentation coverage from 0 to 100:
92
+ - 0: Had to hallucinate/guess most implementation details
93
+ - 30: Significant gaps — filled with assumptions
94
+ - 50: Some gaps — inferred from partial information
95
+ - 80: Minor gaps — almost everything was documented
96
+ - 100: Complete coverage — all necessary info was in docs
97
+
98
+ Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
99
+ provider: anthropic:messages:claude-opus-4-5-20251101
100
+ metadata:
101
+ dimension: doc-coverage
102
+ maxScore: 100
103
+ - description: GROQ - Joins and reference resolution (gold)
104
+ vars:
105
+ task: |-
106
+ Write GROQ queries that demonstrate join patterns in Sanity:
107
+
108
+ 1. Follow a single reference to resolve an author's full profile
109
+ from a post (post.author -> author document with name, bio, image)
110
+ 2. Resolve an array of category references from a post
111
+ (post.categories[]-> with title and slug)
112
+ 3. Write a reverse reference query: given an author's ID, find all
113
+ posts by that author using a subquery and the parent scope operator (^)
114
+ 4. Create a nested join: for each author, include their 5 most recent
115
+ posts as a nested array
116
+ 5. Use the references() function to find all documents that reference
117
+ a specific document ID
118
+
119
+ Use @sanity/client with client.fetch(). Include TypeScript types.
120
+ docs: file://contexts/canonical/groq-joins-references.md
121
+ __featureArea: groq
122
+ assert:
123
+ - type: llm-rubric
124
+ value: |-
125
+ Score task completion from 0 to 100:
126
+ - 0: Couldn't attempt — missing critical information
127
+ - 20: Attempted but fundamentally wrong approach
128
+ - 50: Partial implementation — major functional gaps
129
+ - 80: Mostly complete — minor issues or missing edge cases
130
+ - 100: Fully functional code — works as expected
131
+
132
+ Must demonstrate:
133
+ - Single reference follow with -> operator
134
+ - Array reference resolution with []->
135
+ - Reverse reference / subquery using *[references(^._id)]
136
+ - Nested join pattern with parent scope (^)
137
+ - The references() function
138
+
139
+ Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
140
+ provider: anthropic:messages:claude-opus-4-5-20251101
141
+ metadata:
142
+ dimension: task-completion
143
+ maxScore: 100
144
+ - type: llm-rubric
145
+ value: |-
146
+ Score code correctness from 0 to 100:
147
+ - 0: Broken code, syntax errors, or deprecated APIs
148
+ - 30: Works but uses anti-patterns or inefficient approaches
149
+ - 50: Works but not idiomatic
150
+ - 80: Follows most best practices
151
+ - 100: Follows all best practices, idiomatic implementation
152
+
153
+ Check for:
154
+ - Correct -> dereference syntax
155
+ - Valid []-> array dereference
156
+ - Proper use of ^ parent scope operator
157
+ - Valid references() function usage
158
+ - No made-up syntax
159
+
160
+ Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
161
+ provider: anthropic:messages:claude-opus-4-5-20251101
162
+ metadata:
163
+ dimension: code-correctness
164
+ maxScore: 100
165
+ - type: contains
166
+ value: '->'
167
+ weight: 1
168
+ - type: contains-any
169
+ value:
170
+ - references(
171
+ - references(^
172
+ weight: 1
173
+ - type: llm-rubric
174
+ value: |-
175
+ Score documentation coverage from 0 to 100:
176
+ - 0: Had to hallucinate/guess most implementation details
177
+ - 30: Significant gaps — filled with assumptions
178
+ - 50: Some gaps — inferred from partial information
179
+ - 80: Minor gaps — almost everything was documented
180
+ - 100: Complete coverage — all necessary info was in docs
181
+
182
+ Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
183
+ provider: anthropic:messages:claude-opus-4-5-20251101
184
+ metadata:
185
+ dimension: doc-coverage
186
+ maxScore: 100
187
+ - description: GROQ - Advanced filtering and projections (gold)
188
+ vars:
189
+ task: |-
190
+ Write GROQ queries demonstrating advanced filtering and projection patterns:
191
+
192
+ 1. Use select() for conditional projections — return different fields
193
+ based on the document's _type (e.g., posts get excerpt, events get
194
+ date and venue)
195
+ 2. Use coalesce() for fallback values — e.g., use seoTitle if it
196
+ exists, otherwise fall back to title
197
+ 3. Use the match operator for full-text search in titles
198
+ 4. Use count() to count documents matching a filter and to count
199
+ items within an array field
200
+ 5. Use defined() to filter for documents that have a specific field set
201
+ 6. Filter items within an array using [condition] syntax
202
+ 7. Order results by multiple fields (e.g., featured status first,
203
+ then by publishedAt)
204
+
205
+ Use @sanity/client with client.fetch(). Include TypeScript types.
206
+ docs: file://contexts/canonical/groq-advanced-filtering.md
207
+ __featureArea: groq
208
+ assert:
209
+ - type: llm-rubric
210
+ value: |-
211
+ Score task completion from 0 to 100:
212
+ - 0: Couldn't attempt — missing critical information
213
+ - 20: Attempted but fundamentally wrong approach
214
+ - 50: Partial implementation — major functional gaps
215
+ - 80: Mostly complete — minor issues or missing edge cases
216
+ - 100: Fully functional code — works as expected
217
+
218
+ Must demonstrate:
219
+ - select() for conditional projections
220
+ - coalesce() for fallback values
221
+ - match operator for text search
222
+ - count() function usage
223
+ - defined() function for existence checks
224
+ - Array filtering with [condition]
225
+ - Multi-field ordering
226
+
227
+ Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
228
+ provider: anthropic:messages:claude-opus-4-5-20251101
229
+ metadata:
230
+ dimension: task-completion
231
+ maxScore: 100
232
+ - type: llm-rubric
233
+ value: |-
234
+ Score code correctness from 0 to 100:
235
+ - 0: Broken code, syntax errors, or deprecated APIs
236
+ - 30: Works but uses anti-patterns or inefficient approaches
237
+ - 50: Works but not idiomatic
238
+ - 80: Follows most best practices
239
+ - 100: Follows all best practices, idiomatic implementation
240
+
241
+ Check for:
242
+ - Valid select() syntax with => arrow notation
243
+ - Correct coalesce() usage
244
+ - Proper match operator usage (on text fields)
245
+ - Valid count() and defined() function calls
246
+ - Correct array filter syntax
247
+
248
+ Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
249
+ provider: anthropic:messages:claude-opus-4-5-20251101
250
+ metadata:
251
+ dimension: code-correctness
252
+ maxScore: 100
253
+ - type: contains-any
254
+ value:
255
+ - select(
256
+ - coalesce(
257
+ weight: 1
258
+ - type: contains-any
259
+ value:
260
+ - count(
261
+ - defined(
262
+ weight: 1
263
+ - type: contains-any
264
+ value:
265
+ - match
266
+ weight: 1
267
+ - type: llm-rubric
268
+ value: |-
269
+ Score documentation coverage from 0 to 100:
270
+ - 0: Had to hallucinate/guess most implementation details
271
+ - 30: Significant gaps — filled with assumptions
272
+ - 50: Some gaps — inferred from partial information
273
+ - 80: Minor gaps — almost everything was documented
274
+ - 100: Complete coverage — all necessary info was in docs
275
+
276
+ Return ONLY a JSON object: {"score": <number>, "reason": "<explanation>"}
277
+ provider: anthropic:messages:claude-opus-4-5-20251101
278
+ metadata:
279
+ dimension: doc-coverage
280
+ maxScore: 100