@sanity/ailf 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (499) hide show
  1. package/README.md +0 -1
  2. package/canonical/grader-references/README.md +2 -2
  3. package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
  4. package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
  5. package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
  6. package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
  7. package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
  8. package/config/features.ts +1 -1
  9. package/config/models.ts +29 -12
  10. package/config/sources.ts +1 -1
  11. package/config/thresholds.ts +1 -1
  12. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
  13. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
  14. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
  15. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
  16. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
  17. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
  18. package/dist/_vendor/ailf-core/config-helpers.d.ts +20 -17
  19. package/dist/_vendor/ailf-core/config-helpers.js +51 -2
  20. package/dist/_vendor/ailf-core/examples/index.d.ts +166 -80
  21. package/dist/_vendor/ailf-core/examples/index.js +213 -94
  22. package/dist/_vendor/ailf-core/index.d.ts +3 -2
  23. package/dist/_vendor/ailf-core/index.js +2 -1
  24. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
  25. package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
  26. package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
  27. package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
  28. package/dist/_vendor/ailf-core/ports/context.d.ts +22 -1
  29. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
  30. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  31. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
  32. package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
  33. package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
  34. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +1 -0
  35. package/dist/_vendor/ailf-core/schemas/eval-config.js +10 -0
  36. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -1
  37. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +16 -2
  38. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +0 -2
  39. package/dist/_vendor/ailf-core/schemas/pipeline.js +0 -1
  40. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
  41. package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
  42. package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
  43. package/dist/_vendor/ailf-core/services/index.js +1 -1
  44. package/dist/_vendor/ailf-core/services/scoring.js +9 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +25 -1
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
  47. package/dist/_vendor/ailf-core/types/index.d.ts +48 -7
  48. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +105 -23
  49. package/dist/_vendor/ailf-core/types/plugin-registry.js +73 -20
  50. package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
  51. package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
  52. package/dist/adapters/api-client/remediation.js +2 -2
  53. package/dist/adapters/config-sources/file-config-adapter.js +7 -1
  54. package/dist/adapters/config-sources/ts-config-loader.js +21 -13
  55. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
  56. package/dist/adapters/index.d.ts +0 -1
  57. package/dist/adapters/index.js +0 -1
  58. package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
  59. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  60. package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
  61. package/dist/adapters/task-sources/content-lake-task-source.js +21 -26
  62. package/dist/adapters/task-sources/index.d.ts +3 -4
  63. package/dist/adapters/task-sources/index.js +3 -4
  64. package/dist/adapters/task-sources/repo-schemas.d.ts +219 -17
  65. package/dist/adapters/task-sources/repo-schemas.js +228 -20
  66. package/dist/adapters/task-sources/repo-task-source.d.ts +14 -10
  67. package/dist/adapters/task-sources/repo-task-source.js +81 -122
  68. package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
  69. package/dist/adapters/task-sources/repo-trigger.js +1 -1
  70. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  71. package/dist/adapters/task-sources/repo-validation.js +126 -5
  72. package/dist/adapters/task-sources/task-file-loader.d.ts +10 -7
  73. package/dist/adapters/task-sources/task-file-loader.js +21 -7
  74. package/dist/agent-observer/test-imports.d.ts +7 -0
  75. package/dist/agent-observer/test-imports.js +185 -0
  76. package/dist/artifact-capture/comparator.d.ts +22 -0
  77. package/dist/artifact-capture/comparator.js +493 -0
  78. package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
  79. package/dist/artifact-capture/filesystem-collector.js +237 -0
  80. package/dist/artifact-capture/redact-artifact.d.ts +20 -0
  81. package/dist/artifact-capture/redact-artifact.js +115 -0
  82. package/dist/assertions/source-isolation.d.ts +1 -1
  83. package/dist/assertions/source-isolation.js +1 -1
  84. package/dist/cli.js +4 -0
  85. package/dist/commands/calculate-scores.js +1 -0
  86. package/dist/commands/capture-compare.d.ts +15 -0
  87. package/dist/commands/capture-compare.js +253 -0
  88. package/dist/commands/capture-list.d.ts +12 -0
  89. package/dist/commands/capture-list.js +147 -0
  90. package/dist/commands/capture.d.ts +9 -0
  91. package/dist/commands/capture.js +16 -0
  92. package/dist/commands/chronic-failures.d.ts +8 -0
  93. package/dist/commands/chronic-failures.js +33 -0
  94. package/dist/commands/coverage-audit.js +3 -1
  95. package/dist/commands/explain-handler.d.ts +1 -1
  96. package/dist/commands/explain-handler.js +37 -8
  97. package/dist/commands/fetch-docs.js +1 -0
  98. package/dist/commands/generate-configs.d.ts +3 -3
  99. package/dist/commands/generate-configs.js +20 -8
  100. package/dist/commands/init.d.ts +5 -4
  101. package/dist/commands/init.js +190 -25
  102. package/dist/commands/pipeline-action.d.ts +7 -1
  103. package/dist/commands/pipeline-action.js +43 -19
  104. package/dist/commands/pipeline.d.ts +6 -1
  105. package/dist/commands/pipeline.js +7 -2
  106. package/dist/commands/pr-comment.js +1 -0
  107. package/dist/commands/publish.js +1 -0
  108. package/dist/commands/shared/help.js +2 -2
  109. package/dist/commands/update-quality-scores.d.ts +5 -0
  110. package/dist/commands/update-quality-scores.js +20 -0
  111. package/dist/commands/validate-tasks.d.ts +2 -2
  112. package/dist/commands/validate-tasks.js +26 -15
  113. package/dist/composition-root.d.ts +15 -4
  114. package/dist/composition-root.js +100 -55
  115. package/dist/config/features.ts +23 -0
  116. package/dist/config/models.ts +100 -0
  117. package/dist/config/prompts.ts +16 -0
  118. package/dist/config/rubrics.ts +225 -0
  119. package/dist/config/schedules.ts +47 -0
  120. package/dist/config/sinks.ts +37 -0
  121. package/dist/config/sources.ts +21 -0
  122. package/dist/config/thresholds.ts +61 -0
  123. package/dist/index.d.ts +41 -0
  124. package/dist/index.js +48 -0
  125. package/dist/lib/agent-behavior-report.d.ts +8 -0
  126. package/dist/lib/agent-behavior-report.js +185 -0
  127. package/dist/lib/baseline.d.ts +19 -0
  128. package/dist/lib/baseline.js +153 -0
  129. package/dist/lib/calculate-scores.d.ts +23 -0
  130. package/dist/lib/calculate-scores.js +42 -0
  131. package/dist/lib/compare.d.ts +18 -0
  132. package/dist/lib/compare.js +170 -0
  133. package/dist/lib/coverage-audit.d.ts +4 -0
  134. package/dist/lib/coverage-audit.js +42 -0
  135. package/dist/lib/discovery-report.d.ts +13 -0
  136. package/dist/lib/discovery-report.js +57 -0
  137. package/dist/lib/fetch-docs.d.ts +30 -0
  138. package/dist/lib/fetch-docs.js +171 -0
  139. package/dist/lib/generate-configs.d.ts +25 -0
  140. package/dist/lib/generate-configs.js +42 -0
  141. package/dist/lib/grader-api.d.ts +21 -0
  142. package/dist/lib/grader-api.js +34 -0
  143. package/dist/lib/grader-compare.d.ts +19 -0
  144. package/dist/lib/grader-compare.js +91 -0
  145. package/dist/lib/grader-consistency.d.ts +27 -0
  146. package/dist/lib/grader-consistency.js +79 -0
  147. package/dist/lib/grader-sensitivity.d.ts +19 -0
  148. package/dist/lib/grader-sensitivity.js +75 -0
  149. package/dist/lib/grader-validate.d.ts +19 -0
  150. package/dist/lib/grader-validate.js +78 -0
  151. package/dist/lib/measure-retrieval.d.ts +14 -0
  152. package/dist/lib/measure-retrieval.js +71 -0
  153. package/dist/lib/pr-comment.d.ts +16 -0
  154. package/dist/lib/pr-comment.js +28 -0
  155. package/dist/lib/readiness-report.d.ts +13 -0
  156. package/dist/lib/readiness-report.js +108 -0
  157. package/dist/lib/webhook-server.d.ts +11 -0
  158. package/dist/lib/webhook-server.js +24 -0
  159. package/dist/lib/weekly-digest.d.ts +24 -0
  160. package/dist/lib/weekly-digest.js +148 -0
  161. package/dist/orchestration/build-app-context.js +13 -0
  162. package/dist/orchestration/build-step-sequence.js +4 -2
  163. package/dist/orchestration/cache-context.d.ts +23 -0
  164. package/dist/orchestration/cache-context.js +43 -0
  165. package/dist/orchestration/env-bridge.d.ts +21 -0
  166. package/dist/orchestration/env-bridge.js +66 -0
  167. package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
  168. package/dist/orchestration/load-pipeline-tasks.js +52 -0
  169. package/dist/orchestration/pipeline-orchestrator.js +75 -5
  170. package/dist/orchestration/step-runner.js +5 -1
  171. package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
  172. package/dist/orchestration/steps/calculate-scores-step.js +13 -0
  173. package/dist/orchestration/steps/callback-step.js +10 -1
  174. package/dist/orchestration/steps/compare-step.js +6 -3
  175. package/dist/orchestration/steps/discovery-report-step.js +6 -2
  176. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  177. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  178. package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
  179. package/dist/orchestration/steps/fetch-docs-step.js +32 -19
  180. package/dist/orchestration/steps/gap-analysis-step.js +13 -2
  181. package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
  182. package/dist/orchestration/steps/generate-configs-step.js +77 -26
  183. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
  184. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  185. package/dist/orchestration/steps/publish-report-step.js +19 -0
  186. package/dist/orchestration/steps/readiness-step.js +8 -3
  187. package/dist/orchestration/steps/report-step.js +17 -4
  188. package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
  189. package/dist/orchestration/steps/run-eval-step.js +51 -31
  190. package/dist/pipeline/agent-behavior-report.js +6 -0
  191. package/dist/pipeline/attribution.d.ts +1 -1
  192. package/dist/pipeline/attribution.js +1 -1
  193. package/dist/pipeline/cache.js +29 -15
  194. package/dist/pipeline/calculate-scores.d.ts +2 -0
  195. package/dist/pipeline/calculate-scores.js +70 -33
  196. package/dist/pipeline/chronic-failures.d.ts +55 -0
  197. package/dist/pipeline/chronic-failures.js +110 -0
  198. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +1 -1
  199. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +1 -1
  200. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +1 -1
  201. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +132 -62
  202. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
  203. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +33 -100
  204. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
  205. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
  206. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
  207. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
  208. package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
  209. package/dist/pipeline/compiler/assertion-mapper.js +1 -1
  210. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
  211. package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
  212. package/dist/pipeline/compiler/config-loader.d.ts +14 -0
  213. package/dist/pipeline/compiler/config-loader.js +42 -2
  214. package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
  215. package/dist/pipeline/compiler/fixture-resolver.js +1 -1
  216. package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
  217. package/dist/pipeline/compiler/ignore-fields.js +1 -1
  218. package/dist/pipeline/compiler/index.d.ts +2 -5
  219. package/dist/pipeline/compiler/index.js +2 -5
  220. package/dist/pipeline/compiler/literacy-bridge.d.ts +2 -2
  221. package/dist/pipeline/compiler/literacy-bridge.js +2 -2
  222. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  223. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  224. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  225. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  226. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  227. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  228. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +23 -0
  229. package/dist/pipeline/compiler/mode-bases/literacy.js +132 -0
  230. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  231. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  232. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  233. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  234. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  235. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  236. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  237. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  238. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  239. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  240. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  241. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  242. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  243. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  245. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  246. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  247. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  248. package/dist/pipeline/compiler/mode-handlers/index.d.ts +6 -7
  249. package/dist/pipeline/compiler/mode-handlers/index.js +6 -8
  250. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  251. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  252. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  253. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  254. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  255. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  256. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  257. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  258. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  259. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  260. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  261. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  262. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  263. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  264. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  265. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  266. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  267. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  268. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  269. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  270. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  271. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  272. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  273. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  274. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +63 -6
  275. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  276. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  277. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  278. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  279. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  280. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  281. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  282. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  283. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  284. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +108 -0
  285. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  286. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  287. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  288. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  289. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +3 -1
  290. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +65 -67
  291. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  292. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +191 -0
  293. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  294. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +101 -0
  295. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  296. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  297. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +19 -0
  298. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +323 -0
  299. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +103 -0
  300. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  301. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
  302. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
  303. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  304. package/dist/pipeline/compiler/preset-loader.js +99 -0
  305. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +7 -10
  306. package/dist/pipeline/compiler/presets/sanity-literacy.js +11 -157
  307. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
  308. package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
  309. package/dist/pipeline/compiler/provider-assembler.js +13 -7
  310. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
  311. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
  312. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
  313. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
  314. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
  315. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
  316. package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
  317. package/dist/pipeline/compiler/sandbox/index.js +1 -1
  318. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
  319. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
  320. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
  321. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
  322. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
  323. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
  324. package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
  325. package/dist/pipeline/compiler/scoring-bridge.js +1 -1
  326. package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
  327. package/dist/pipeline/compiler/task-bridge.js +92 -0
  328. package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
  329. package/dist/pipeline/compiler/task-graph-builder.js +1 -4
  330. package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
  331. package/dist/pipeline/compiler/telemetry/index.js +1 -1
  332. package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
  333. package/dist/pipeline/compiler/variable-resolver.js +1 -1
  334. package/dist/pipeline/coverage-audit.d.ts +1 -1
  335. package/dist/pipeline/coverage-audit.js +1 -1
  336. package/dist/pipeline/degradations.d.ts +1 -1
  337. package/dist/pipeline/degradations.js +1 -1
  338. package/dist/pipeline/expand-tasks.d.ts +2 -2
  339. package/dist/pipeline/expand-tasks.js +2 -2
  340. package/dist/pipeline/failure-modes.d.ts +1 -1
  341. package/dist/pipeline/failure-modes.js +13 -1
  342. package/dist/pipeline/gap-analysis.d.ts +1 -1
  343. package/dist/pipeline/gap-analysis.js +3 -1
  344. package/dist/pipeline/generate-configs.d.ts +2 -2
  345. package/dist/pipeline/generate-configs.js +16 -9
  346. package/dist/pipeline/grader-compare-runner.d.ts +1 -1
  347. package/dist/pipeline/grader-compare-runner.js +7 -1
  348. package/dist/pipeline/grader-comparison.d.ts +1 -1
  349. package/dist/pipeline/grader-comparison.js +1 -1
  350. package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
  351. package/dist/pipeline/grader-consistency-runner.js +7 -1
  352. package/dist/pipeline/grader-consistency.d.ts +1 -1
  353. package/dist/pipeline/grader-consistency.js +1 -1
  354. package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
  355. package/dist/pipeline/grader-sensitivity-runner.js +1 -1
  356. package/dist/pipeline/grader-sensitivity.d.ts +1 -1
  357. package/dist/pipeline/grader-sensitivity.js +1 -1
  358. package/dist/pipeline/grader-validate-runner.d.ts +1 -1
  359. package/dist/pipeline/grader-validate-runner.js +2 -2
  360. package/dist/pipeline/grader-validation.d.ts +1 -1
  361. package/dist/pipeline/grader-validation.js +1 -1
  362. package/dist/pipeline/map-request-to-config.js +16 -2
  363. package/dist/pipeline/mirror-repo-tasks.d.ts +8 -8
  364. package/dist/pipeline/mirror-repo-tasks.js +10 -10
  365. package/dist/pipeline/plan-format.d.ts +1 -1
  366. package/dist/pipeline/plan-format.js +1 -1
  367. package/dist/pipeline/plan.d.ts +1 -1
  368. package/dist/pipeline/plan.js +68 -30
  369. package/dist/pipeline/probe.d.ts +1 -1
  370. package/dist/pipeline/probe.js +1 -1
  371. package/dist/pipeline/readiness-report.d.ts +2 -2
  372. package/dist/pipeline/readiness-report.js +2 -2
  373. package/dist/pipeline/release-classification.d.ts +1 -1
  374. package/dist/pipeline/release-classification.js +1 -1
  375. package/dist/pipeline/release-report.d.ts +1 -1
  376. package/dist/pipeline/release-report.js +1 -1
  377. package/dist/pipeline/repo-eval-comment.d.ts +1 -1
  378. package/dist/pipeline/repo-eval-comment.js +1 -1
  379. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  380. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  381. package/dist/pipeline/resolve-mappings.d.ts +6 -6
  382. package/dist/pipeline/resolve-mappings.js +44 -44
  383. package/dist/pipeline/retrieval-metrics.d.ts +3 -3
  384. package/dist/pipeline/retrieval-metrics.js +28 -20
  385. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  386. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  387. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  388. package/dist/pipeline/steps/compare-step.js +90 -0
  389. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  390. package/dist/pipeline/steps/eval-step.js +347 -0
  391. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  392. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  393. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  394. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  395. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  396. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  397. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  398. package/dist/pipeline/steps/publish-report-step.js +243 -0
  399. package/dist/pipeline/steps/report-step.d.ts +13 -0
  400. package/dist/pipeline/steps/report-step.js +56 -0
  401. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  402. package/dist/pipeline/steps/update-scores-step.js +42 -0
  403. package/dist/pipeline/targeted-loo.d.ts +1 -1
  404. package/dist/pipeline/targeted-loo.js +1 -1
  405. package/dist/pipeline/thresholds.d.ts +1 -1
  406. package/dist/pipeline/thresholds.js +1 -1
  407. package/dist/pipeline/validate.js +13 -0
  408. package/dist/report-store.d.ts +17 -0
  409. package/dist/report-store.js +24 -0
  410. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  411. package/dist/scripts/agent-behavior-report.js +315 -0
  412. package/dist/scripts/baseline.d.ts +43 -0
  413. package/dist/scripts/baseline.js +267 -0
  414. package/dist/scripts/calculate-scores.d.ts +166 -0
  415. package/dist/scripts/calculate-scores.js +1296 -0
  416. package/dist/scripts/compare.d.ts +22 -0
  417. package/dist/scripts/compare.js +334 -0
  418. package/dist/scripts/coverage-audit.d.ts +44 -0
  419. package/dist/scripts/coverage-audit.js +209 -0
  420. package/dist/scripts/debug-eval.d.ts +19 -0
  421. package/dist/scripts/debug-eval.js +73 -0
  422. package/dist/scripts/discovery-report.d.ts +58 -0
  423. package/dist/scripts/discovery-report.js +250 -0
  424. package/dist/scripts/fetch-docs.d.ts +35 -0
  425. package/dist/scripts/fetch-docs.js +472 -0
  426. package/dist/scripts/generate-configs.d.ts +66 -0
  427. package/dist/scripts/generate-configs.js +459 -0
  428. package/dist/scripts/grader-api.d.ts +27 -0
  429. package/dist/scripts/grader-api.js +206 -0
  430. package/dist/scripts/grader-compare.d.ts +22 -0
  431. package/dist/scripts/grader-compare.js +368 -0
  432. package/dist/scripts/grader-consistency.d.ts +20 -0
  433. package/dist/scripts/grader-consistency.js +313 -0
  434. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  435. package/dist/scripts/grader-sensitivity.js +354 -0
  436. package/dist/scripts/grader-validate.d.ts +19 -0
  437. package/dist/scripts/grader-validate.js +267 -0
  438. package/dist/scripts/measure-retrieval.d.ts +10 -0
  439. package/dist/scripts/measure-retrieval.js +145 -0
  440. package/dist/scripts/migrate-task-mode.d.ts +1 -1
  441. package/dist/scripts/migrate-task-mode.js +1 -1
  442. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
  443. package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
  444. package/dist/scripts/pipeline.d.ts +76 -0
  445. package/dist/scripts/pipeline.js +1031 -0
  446. package/dist/scripts/pr-comment.d.ts +10 -0
  447. package/dist/scripts/pr-comment.js +510 -0
  448. package/dist/scripts/readiness-report.d.ts +88 -0
  449. package/dist/scripts/readiness-report.js +342 -0
  450. package/dist/scripts/update-quality-scores.d.ts +15 -0
  451. package/dist/scripts/update-quality-scores.js +184 -0
  452. package/dist/scripts/validate-task-sources.d.ts +1 -1
  453. package/dist/scripts/validate-task-sources.js +1 -1
  454. package/dist/scripts/validate.d.ts +13 -0
  455. package/dist/scripts/validate.js +79 -0
  456. package/dist/scripts/webhook-server.d.ts +26 -0
  457. package/dist/scripts/webhook-server.js +147 -0
  458. package/dist/scripts/weekly-digest.d.ts +24 -0
  459. package/dist/scripts/weekly-digest.js +144 -0
  460. package/dist/sinks/format-slack.d.ts +64 -0
  461. package/dist/sinks/format-slack.js +306 -0
  462. package/dist/sinks/slack-sink.d.ts +27 -0
  463. package/dist/sinks/slack-sink.js +78 -0
  464. package/dist/sinks/types.d.ts +1 -1
  465. package/dist/sinks/types.js +1 -1
  466. package/dist/sinks/webhook-sink.d.ts +19 -0
  467. package/dist/sinks/webhook-sink.js +50 -0
  468. package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
  469. package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
  470. package/dist/tasks/literacy/content-lake.task.ts +181 -0
  471. package/dist/tasks/literacy/frameworks.task.ts +129 -0
  472. package/dist/tasks/literacy/functions.task.ts +70 -0
  473. package/dist/tasks/literacy/groq.task.ts +259 -0
  474. package/dist/tasks/literacy/image-handling.task.ts +95 -0
  475. package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
  476. package/dist/tasks/literacy/portable-text.task.ts +169 -0
  477. package/dist/tasks/literacy/studio-setup.task.ts +134 -0
  478. package/dist/tasks/literacy/visual-editing.task.ts +147 -0
  479. package/package.json +32 -24
  480. package/tasks/.expanded.agentic.yaml +280 -0
  481. package/tasks/.expanded.yaml +565 -0
  482. package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
  483. package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
  484. package/tasks/literacy/content-lake.task.ts +181 -0
  485. package/tasks/literacy/frameworks.task.ts +1 -0
  486. package/tasks/literacy/functions.task.ts +1 -0
  487. package/tasks/literacy/groq.task.ts +1 -0
  488. package/tasks/literacy/image-handling.task.ts +95 -0
  489. package/tasks/literacy/nextjs-live.task.ts +2 -1
  490. package/tasks/literacy/portable-text.task.ts +169 -0
  491. package/tasks/literacy/studio-setup.task.ts +5 -2
  492. package/tasks/literacy/visual-editing.task.ts +1 -0
  493. package/LICENSE +0 -21
  494. package/tasks/frameworks.yaml +0 -98
  495. package/tasks/functions.yaml +0 -51
  496. package/tasks/groq.yaml +0 -216
  497. package/tasks/nextjs-live.yaml +0 -62
  498. package/tasks/studio-setup.yaml +0 -111
  499. package/tasks/visual-editing.yaml +0 -120
@@ -1,51 +0,0 @@
1
- # tasks/functions.yaml
2
- #
3
- # Sanity Functions & Automations — webhooks, event-driven functions.
4
- #
5
- # Each task is defined once. The pipeline auto-generates gold (with docs)
6
- # and baseline (without docs) variants from each definition.
7
-
8
- # ============================================================
9
- # TASK: Document Publish Webhook Function
10
- # ============================================================
11
- - id: functions-webhook
12
- description: "Functions - Webhook on document publish"
13
- doc_coverage: true
14
- canonical_docs:
15
- - slug: functions-introduction
16
- reason: "Functions overview — deployment, triggers, lifecycle"
17
- - slug: webhooks
18
- reason: "GROQ-powered webhooks — configuration and filtering"
19
- - slug: function-wrapper
20
- reason: "Function handler reference — event shape, exports"
21
- - slug: functions-cheatsheet
22
- reason: "Quick reference for common function patterns"
23
- reference_solution: reference-solutions/functions/publish-webhook.ts
24
- vars:
25
- task: |
26
- Deploy a Sanity function that triggers when a document is published
27
- and sends a webhook notification to an external endpoint.
28
-
29
- Requirements:
30
- 1. The function should trigger on document publish events
31
- 2. Filter to only fire for "post" document types
32
- 3. Send a POST request to an external URL with the document data
33
- 4. Handle errors gracefully
34
-
35
- Provide a complete implementation including any configuration.
36
- docs: file://contexts/canonical/functions-webhook.md
37
- assert:
38
- - type: llm-rubric
39
- template: task-completion
40
- criteria:
41
- - Event-driven function definition
42
- - Document type filtering (post type only)
43
- - HTTP webhook call with document payload
44
- - Basic error handling
45
-
46
- - type: llm-rubric
47
- template: code-correctness
48
- criteria:
49
- - No deprecated API patterns
50
- - Proper error handling
51
- - Idiomatic function handler structure
package/tasks/groq.yaml DELETED
@@ -1,216 +0,0 @@
1
- # tasks/groq.yaml
2
- #
3
- # GROQ Query Language — filters, projections, joins, ordering, pagination.
4
- #
5
- # Each task is defined once. The pipeline auto-generates gold (with docs)
6
- # and baseline (without docs) variants from each definition.
7
-
8
- # ============================================================
9
- # TASK: Blog queries with filtering and pagination
10
- # ============================================================
11
- - id: groq-blog-queries
12
- description: "GROQ - Blog queries with filtering and pagination"
13
- doc_coverage: true
14
- canonical_docs:
15
- - slug: groq-introduction
16
- reason: "GROQ overview — syntax, filters, projections, core concepts"
17
- - slug: how-queries-work
18
- reason: "Detailed walkthrough of GROQ query mechanics"
19
- - slug: query-cheat-sheet
20
- reason: "Ready-made query patterns for common use cases"
21
- - slug: paginating-with-groq
22
- reason: "Pagination patterns with slice syntax"
23
- reference_solution: reference-solutions/groq/blog-queries.ts
24
- vars:
25
- task: |
26
- Write GROQ queries for a Sanity blog application:
27
-
28
- 1. Fetch all published blog posts ordered by publishedAt descending,
29
- with a projection that includes: _id, title, slug (from slug.current),
30
- publishedAt, excerpt, and the author's name (resolved from a reference)
31
- 2. Add pagination to return only the first 10 results
32
- 3. Fetch a single post by its slug parameter, including the full body
33
- content and resolved author and category references
34
- 4. Fetch posts published after a specific date
35
- 5. Fetch posts that belong to a specific category (where categories
36
- is an array of references)
37
-
38
- Use @sanity/client with client.fetch() for all queries. Include
39
- TypeScript types for the query results.
40
- docs: file://contexts/canonical/groq-blog-queries.md
41
- assert:
42
- - type: llm-rubric
43
- template: task-completion
44
- criteria:
45
- - GROQ filter with _type == "post"
46
- - 'Projection with aliased slug field ("slug": slug.current)'
47
- - Reference resolution with -> for author
48
- - Ordering with | order(publishedAt desc)
49
- - Slice/pagination syntax [0...10] or [0..9]
50
- - Parameterized query with $slug for single post fetch
51
- - Date filtering with dateTime() or string comparison
52
- - Category filtering using references or array contains
53
-
54
- - type: llm-rubric
55
- template: code-correctness
56
- criteria:
57
- - Valid GROQ syntax (proper filter brackets, projection braces)
58
- - Uses @sanity/client createClient + client.fetch()
59
- - Correct parameter passing syntax ($param)
60
- - Proper reference dereference with ->
61
- - No deprecated patterns
62
-
63
- - type: contains-any
64
- value:
65
- - "client.fetch"
66
- - "createClient"
67
- weight: 1
68
-
69
- - type: contains-any
70
- value:
71
- - "order(publishedAt"
72
- - "order(_createdAt"
73
- - "| order("
74
- weight: 1
75
-
76
- - type: contains-any
77
- value:
78
- - "[0...10]"
79
- - "[0..9]"
80
- - "[0..."
81
- weight: 1
82
-
83
- # ============================================================
84
- # TASK: Joins and reference resolution
85
- # ============================================================
86
- - id: groq-joins-references
87
- description: "GROQ - Joins and reference resolution"
88
- doc_coverage: true
89
- canonical_docs:
90
- - slug: groq-joins
91
- reason: "Join patterns — ->, []-> , subqueries, parent scope (^)"
92
- - slug: how-queries-work
93
- reason: "Core query mechanics including reference traversal"
94
- - slug: groq-introduction
95
- reason: "Overview including reference resolution basics"
96
- reference_solution: reference-solutions/groq/joins-references.ts
97
- vars:
98
- task: |
99
- Write GROQ queries that demonstrate join patterns in Sanity:
100
-
101
- 1. Follow a single reference to resolve an author's full profile
102
- from a post (post.author -> author document with name, bio, image)
103
- 2. Resolve an array of category references from a post
104
- (post.categories[]-> with title and slug)
105
- 3. Write a reverse reference query: given an author's ID, find all
106
- posts by that author using a subquery and the parent scope operator (^)
107
- 4. Create a nested join: for each author, include their 5 most recent
108
- posts as a nested array
109
- 5. Use the references() function to find all documents that reference
110
- a specific document ID
111
-
112
- Use @sanity/client with client.fetch(). Include TypeScript types.
113
- docs: file://contexts/canonical/groq-joins-references.md
114
- assert:
115
- - type: llm-rubric
116
- template: task-completion
117
- criteria:
118
- - Single reference follow with -> operator
119
- - Array reference resolution with []->
120
- - Reverse reference / subquery using *[references(^._id)]
121
- - Nested join pattern with parent scope (^)
122
- - The references() function
123
-
124
- - type: llm-rubric
125
- template: code-correctness
126
- criteria:
127
- - Correct -> dereference syntax
128
- - Valid []-> array dereference
129
- - Proper use of ^ parent scope operator
130
- - Valid references() function usage
131
- - No made-up syntax
132
-
133
- - type: contains
134
- value: "->"
135
- weight: 1
136
-
137
- - type: contains-any
138
- value:
139
- - "references("
140
- - "references(^"
141
- weight: 1
142
-
143
- # ============================================================
144
- # TASK: Advanced filtering and projections
145
- # ============================================================
146
- - id: groq-advanced-filtering
147
- description: "GROQ - Advanced filtering and projections"
148
- doc_coverage: true
149
- canonical_docs:
150
- - slug: groq-functions
151
- reason:
152
- "GROQ function reference — select(), coalesce(), count(), defined()"
153
- - slug: query-cheat-sheet
154
- reason: "Common filtering patterns and match operator examples"
155
- - slug: groq-operators
156
- reason: "Operator reference — match, comparison, logical operators"
157
- - slug: how-queries-work
158
- reason: "Query mechanics including projections and ordering"
159
- reference_solution: reference-solutions/groq/advanced-filtering.ts
160
- vars:
161
- task: |
162
- Write GROQ queries demonstrating advanced filtering and projection
163
- patterns:
164
-
165
- 1. Use select() for conditional projections — return different fields
166
- based on the document's _type (e.g., posts get excerpt, events get
167
- date and venue)
168
- 2. Use coalesce() for fallback values — e.g., use seoTitle if it
169
- exists, otherwise fall back to title
170
- 3. Use the match operator for full-text search in titles
171
- 4. Use count() to count documents matching a filter and to count
172
- items within an array field
173
- 5. Use defined() to filter for documents that have a specific field set
174
- 6. Filter items within an array using [condition] syntax
175
- 7. Order results by multiple fields (e.g., featured status first,
176
- then by publishedAt)
177
-
178
- Use @sanity/client with client.fetch(). Include TypeScript types.
179
- docs: file://contexts/canonical/groq-advanced-filtering.md
180
- assert:
181
- - type: llm-rubric
182
- template: task-completion
183
- criteria:
184
- - select() for conditional projections
185
- - coalesce() for fallback values
186
- - match operator for text search
187
- - count() function usage
188
- - defined() function for existence checks
189
- - Array filtering with [condition]
190
- - Multi-field ordering
191
-
192
- - type: llm-rubric
193
- template: code-correctness
194
- criteria:
195
- - Valid select() syntax with => arrow notation
196
- - Correct coalesce() usage
197
- - Proper match operator usage (on text fields)
198
- - Valid count() and defined() function calls
199
- - Correct array filter syntax
200
-
201
- - type: contains-any
202
- value:
203
- - "select("
204
- - "coalesce("
205
- weight: 1
206
-
207
- - type: contains-any
208
- value:
209
- - "count("
210
- - "defined("
211
- weight: 1
212
-
213
- - type: contains-any
214
- value:
215
- - "match"
216
- weight: 1
@@ -1,62 +0,0 @@
1
- # tasks/nextjs-live.yaml
2
- #
3
- # Next.js Integration + Live Content API tasks.
4
- #
5
- # Each task is defined once. The pipeline auto-generates gold (with docs)
6
- # and baseline (without docs) variants from each definition.
7
-
8
- # ============================================================
9
- # TASK: Next.js App Router Integration
10
- # ============================================================
11
- - id: nextjs-app-router-integration
12
- description: "Next.js - App Router integration with TypeScript"
13
- doc_coverage: true
14
- canonical_docs:
15
- - slug: next-js-app-router-quickstart
16
- reason: "Primary Next.js App Router quickstart guide"
17
- - slug: diplaying-content-in-next-js
18
- reason: "Fetching and displaying Sanity content in Next.js"
19
- - slug: sanity-typegen
20
- reason: "TypeGen — generating TypeScript types from GROQ queries"
21
- - slug: groq-introduction
22
- reason: "GROQ query language introduction"
23
- reference_solution: reference-solutions/nextjs/app-router-integration.tsx
24
- vars:
25
- task: |
26
- Integrate Sanity into a Next.js 16 App Router application
27
- with full TypeScript support:
28
-
29
- 1. Set up the Sanity client with proper configuration
30
- 2. Create a typed GROQ query for fetching blog posts
31
- 3. Build a server component that fetches and renders posts
32
- 4. Generate TypeScript types using sanity typegen
33
-
34
- Provide all files needed for a working integration.
35
- docs: file://contexts/canonical/nextjs-app-router-integration.md
36
- assert:
37
- - type: llm-rubric
38
- template: task-completion
39
- criteria:
40
- - Sanity client configuration (createClient)
41
- - GROQ query with proper syntax
42
- - Next.js App Router server component using async/await
43
- - TypeScript type definitions or typegen setup
44
-
45
- - type: llm-rubric
46
- template: code-correctness
47
- criteria:
48
- - "Uses @sanity/client (not deprecated packages)"
49
- - App Router patterns (not Pages Router)
50
- - Proper server component data fetching (no useEffect)
51
- - Valid GROQ query syntax
52
-
53
- - type: contains
54
- value: "createClient"
55
- weight: 1
56
-
57
- - type: contains-any
58
- value:
59
- - "groq"
60
- - "GROQ"
61
- - "*["
62
- weight: 1
@@ -1,111 +0,0 @@
1
- # tasks/studio-setup.yaml
2
- #
3
- # Studio Setup & Customization — schema, config, and tooling tasks.
4
- #
5
- # Each task is defined once. The pipeline auto-generates gold (with docs)
6
- # and baseline (without docs) variants from each definition.
7
-
8
- # ============================================================
9
- # TASK: Blog Schema Setup
10
- # ============================================================
11
- - id: studio-blog-schema
12
- description: "Studio Setup - Blog schema with posts, authors, categories"
13
- doc_coverage: true
14
- canonical_docs:
15
- - slug: schemas-and-forms
16
- reason: "High-level overview of schemas and the form builder"
17
- - slug: introduction-to-schemas
18
- reason: "Introduces schema concepts, defineType/defineField"
19
- - slug: config-api-reference
20
- reason: "Configuration API — defineConfig, plugins, schema registration"
21
- - slug: reference-type
22
- reason: "Reference field type for author/category relationships"
23
- reference_solution: reference-solutions/studio-setup/blog-schema.ts
24
- vars:
25
- task: |
26
- Set up a new Sanity Studio with a custom schema for a blog:
27
-
28
- 1. Create document types for: posts, authors, categories
29
- 2. Posts should have: title, slug, body (portable text), author reference, categories array
30
- 3. Authors should have: name, bio, image
31
- 4. Categories should have: title, description
32
-
33
- Include the schema definitions and sanity.config.ts setup.
34
- docs: file://contexts/canonical/studio-blog-schema.md
35
- assert:
36
- # Task Completion (0–100)
37
- - type: llm-rubric
38
- template: task-completion
39
- criteria:
40
- - Three document types (post, author, category)
41
- - Post with all required fields including portable text body
42
- - Reference from post to author
43
- - Array of references from post to categories
44
- - sanity.config.ts with schema registration
45
-
46
- # Code Correctness (0–100)
47
- - type: llm-rubric
48
- template: code-correctness
49
- criteria:
50
- - Uses defineConfig, defineType, defineField (v3 syntax)
51
- - Does NOT use createSchema (deprecated v2)
52
- - Proper reference syntax with 'to' array
53
- - Correct portable text array definition
54
-
55
- # Pattern checks (binary pass/fail)
56
- - type: contains
57
- value: "defineConfig"
58
- weight: 1
59
- - type: contains
60
- value: "defineType"
61
- weight: 1
62
- - type: contains
63
- value: "defineField"
64
- weight: 1
65
- - type: not-contains
66
- value: "createSchema"
67
- weight: 1
68
- - type: not-contains
69
- value: "import Schema from"
70
- weight: 1
71
-
72
- # ============================================================
73
- # TASK: Custom Studio Tool
74
- # ============================================================
75
- - id: studio-custom-tool
76
- description: "Studio Setup - Custom tool in sidebar"
77
- canonical_docs:
78
- - slug: studio-tools
79
- reason: "Overview of Studio tools system"
80
- - slug: tool-api-reference
81
- reason: "Tool API — name, title, icon, component properties"
82
- - slug: custom-studio-tool
83
- reason: "Step-by-step guide for creating a custom tool"
84
- reference_solution: reference-solutions/studio-setup/custom-tool.tsx
85
- vars:
86
- task: |
87
- Add a custom tool to the Sanity Studio sidebar that displays
88
- a dashboard. The tool should:
89
-
90
- 1. Appear in the studio navigation with a custom icon
91
- 2. Have a title and name
92
- 3. Render a React component showing a "Dashboard" heading
93
-
94
- Provide the tool definition and sanity.config.ts registration.
95
- docs: file://contexts/canonical/studio-custom-tool.md
96
- assert:
97
- - type: llm-rubric
98
- template: task-completion
99
- criteria:
100
- - Tool object with name, title, icon, component
101
- - React component for the tool UI
102
- - Registration in sanity.config.ts tools array
103
-
104
- - type: contains
105
- value: "tools"
106
- weight: 1
107
- - type: javascript
108
- value: |
109
- return output.includes('name:') &&
110
- output.includes('component') &&
111
- (output.includes('icon:') || output.includes('Icon'))
@@ -1,120 +0,0 @@
1
- # tasks/visual-editing.yaml
2
- #
3
- # Visual Editing — Presentation tool, click-to-edit, live preview.
4
- # This is typically the hardest area for AI tools to get right.
5
- #
6
- # Each task is defined once. The pipeline auto-generates gold (with docs)
7
- # and baseline (without docs) variants from each definition.
8
-
9
- # ============================================================
10
- # TASK: Presentation Tool with Click-to-Edit
11
- # ============================================================
12
- - id: visual-editing-presentation
13
- description: "Visual Editing - Presentation tool with click-to-edit"
14
- doc_coverage: true
15
- canonical_docs:
16
- - slug: configuring-the-presentation-tool
17
- reason: "Core presentationTool configuration and setup"
18
- - slug: introduction-to-visual-editing
19
- reason: "Visual Editing concepts — stega, overlays, data attributes"
20
- - slug: visual-editing-with-next-js-app-router
21
- reason: "Next.js App Router-specific visual editing guide"
22
- - slug: stega
23
- reason: "Stega encoding for click-to-edit data attributes"
24
- reference_solution: reference-solutions/visual-editing/presentation-nextjs.tsx
25
- vars:
26
- task: |
27
- Set up the Presentation tool with a Next.js 14 (App Router) frontend
28
- and implement click-to-edit functionality:
29
-
30
- 1. Configure the Presentation tool in sanity.config.ts
31
- 2. Set up the Next.js app to work with Visual Editing
32
- 3. Implement data attributes so clicking content in the preview
33
- opens the corresponding field in Studio
34
-
35
- Provide all necessary code for both Studio and Next.js sides.
36
- docs: file://contexts/canonical/visual-editing-presentation.md
37
- assert:
38
- # Task Completion (0–100)
39
- - type: llm-rubric
40
- template: task-completion
41
- criteria:
42
- - presentationTool configured in sanity.config.ts
43
- - previewUrl or equivalent configured
44
- - Data attributes for click-to-edit (createDataAttribute or stega)
45
- - Next.js App Router patterns used correctly
46
-
47
- # Code Correctness (0–100)
48
- - type: llm-rubric
49
- template: code-correctness
50
- criteria:
51
- - "Uses @sanity/presentation (not deprecated packages)"
52
- - Uses createDataAttribute or stega encoding correctly
53
- - Proper Next.js App Router patterns (not Pages Router)
54
- - No mixing of deprecated and current APIs
55
-
56
- # Pattern checks (binary pass/fail)
57
- - type: contains
58
- value: "presentationTool"
59
- weight: 1
60
-
61
- - type: contains-any
62
- value:
63
- - "createDataAttribute"
64
- - "data-sanity"
65
- - "encodeDataAttribute"
66
- - "stega"
67
- weight: 1
68
-
69
- - type: not-contains
70
- value: "@sanity/preview-kit"
71
- weight: 1
72
-
73
- # ============================================================
74
- # TASK: Live Preview with Draft Content
75
- # ============================================================
76
- - id: visual-editing-live-preview
77
- description: "Visual Editing - Live preview with draft content"
78
- canonical_docs:
79
- - slug: live-content-api
80
- reason: "Live Content API — defineLive, real-time subscriptions"
81
- - slug: perspectives
82
- reason: "Draft vs published perspectives"
83
- - slug: fetching-content-for-visual-editing
84
- reason: "Data fetching patterns for visual editing contexts"
85
- reference_solution: reference-solutions/visual-editing/live-preview.tsx
86
- vars:
87
- task: |
88
- Implement live preview in a Next.js app that shows draft content
89
- from Sanity in real-time as editors make changes in the Studio.
90
-
91
- Requirements:
92
- - Use the Live Content API approach
93
- - Handle draft vs published perspectives correctly
94
- - Show real-time updates without page refresh
95
-
96
- Provide a complete implementation.
97
- docs: file://contexts/canonical/visual-editing-live-preview.md
98
- assert:
99
- - type: llm-rubric
100
- template: task-completion
101
- criteria:
102
- - Live Content API usage (defineLive, useLiveQuery, or sanityFetch with
103
- live option)
104
- - Draft perspective configuration
105
- - Real-time subscription/update mechanism
106
-
107
- - type: llm-rubric
108
- template: code-correctness
109
- criteria:
110
- - Modern API usage (not deprecated preview packages)
111
- - Proper perspective handling
112
- - Correct subscription lifecycle management
113
-
114
- - type: contains-any
115
- value:
116
- - "useLiveQuery"
117
- - "defineLive"
118
- - "live:"
119
- - "perspective"
120
- weight: 1