@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,111 @@
1
+ /**
2
+ * measure-retrieval.ts
3
+ *
4
+ * Pure retrieval quality measurement functions.
5
+ *
6
+ * Evaluates retrieval quality by comparing what Sanity's text search
7
+ * returns against the manually-annotated canonical documents for each
8
+ * evaluation task. Produces Recall@K and NDCG@K metrics.
9
+ *
10
+ * This answers: "Can a retriever find the docs an LLM actually needs?"
11
+ *
12
+ * Migrated from lib/measure-retrieval.ts — no process.argv/process.env,
13
+ * accepts rootDir and retriever function as parameters.
14
+ */
15
+ import { resolveMappings } from "./resolve-mappings.js";
16
+ // ---------------------------------------------------------------------------
17
+ // Metrics (pure, exported for testing)
18
+ // ---------------------------------------------------------------------------
19
+ export function calculateRecall(canonical, retrieved, k) {
20
+ const retrievedSet = new Set(retrieved.slice(0, k));
21
+ const hits = canonical.filter((doc) => retrievedSet.has(doc)).length;
22
+ return canonical.length === 0 ? 0 : hits / canonical.length;
23
+ }
24
+ export function calculateNDCG(canonical, retrieved, k) {
25
+ const canonicalSet = new Set(canonical);
26
+ // Discounted Cumulative Gain
27
+ let dcg = 0;
28
+ for (let i = 0; i < Math.min(k, retrieved.length); i++) {
29
+ if (canonicalSet.has(retrieved[i])) {
30
+ dcg += 1 / Math.log2(i + 2); // +2 because log2(1) = 0
31
+ }
32
+ }
33
+ // Ideal DCG
34
+ let idcg = 0;
35
+ for (let i = 0; i < Math.min(k, canonical.length); i++) {
36
+ idcg += 1 / Math.log2(i + 2);
37
+ }
38
+ return idcg === 0 ? 0 : dcg / idcg;
39
+ }
40
+ /**
41
+ * Run retrieval quality measurement for all tasks.
42
+ *
43
+ * @returns A full RetrievalSummary with per-task, per-area, and overall metrics.
44
+ */
45
+ export async function measureRetrieval(options) {
46
+ const { rootDir, retriever, onProgress } = options;
47
+ const mappings = resolveMappings(rootDir);
48
+ const results = [];
49
+ for (const [area, areaData] of Object.entries(mappings.feature_areas)) {
50
+ for (const task of areaData.tasks) {
51
+ const canonicalSlugs = task.canonical_docs.map((d) => d.slug);
52
+ // Use the task description as a search query
53
+ const retrieved = await retriever(task.description, 10);
54
+ const result = {
55
+ canonical_docs: canonicalSlugs,
56
+ feature_area: area,
57
+ ndcg_at_10: calculateNDCG(canonicalSlugs, retrieved, 10),
58
+ recall_at_5: calculateRecall(canonicalSlugs, retrieved, 5),
59
+ recall_at_10: calculateRecall(canonicalSlugs, retrieved, 10),
60
+ retrieved_docs: retrieved,
61
+ task_id: task.id,
62
+ };
63
+ results.push(result);
64
+ onProgress?.(area, task.id, result);
65
+ }
66
+ }
67
+ // Aggregate by feature area
68
+ const byArea = {};
69
+ for (const area of Object.keys(mappings.feature_areas)) {
70
+ const areaResults = results.filter((r) => r.feature_area === area);
71
+ if (areaResults.length === 0)
72
+ continue;
73
+ byArea[area] = {
74
+ avg_ndcg_at_10: areaResults.reduce((s, r) => s + r.ndcg_at_10, 0) / areaResults.length,
75
+ avg_recall_at_5: areaResults.reduce((s, r) => s + r.recall_at_5, 0) / areaResults.length,
76
+ avg_recall_at_10: areaResults.reduce((s, r) => s + r.recall_at_10, 0) /
77
+ areaResults.length,
78
+ task_count: areaResults.length,
79
+ };
80
+ }
81
+ // Overall
82
+ const overall = {
83
+ avg_ndcg_at_10: results.reduce((s, r) => s + r.ndcg_at_10, 0) / (results.length || 1),
84
+ avg_recall_at_5: results.reduce((s, r) => s + r.recall_at_5, 0) / (results.length || 1),
85
+ avg_recall_at_10: results.reduce((s, r) => s + r.recall_at_10, 0) / (results.length || 1),
86
+ };
87
+ return { by_area: byArea, overall, results };
88
+ }
89
+ /**
90
+ * Format a retrieval summary for console display.
91
+ */
92
+ export function formatRetrievalTable(summary) {
93
+ const lines = [];
94
+ lines.push("=".repeat(70));
95
+ lines.push("RETRIEVAL QUALITY SUMMARY");
96
+ lines.push("=".repeat(70));
97
+ lines.push("");
98
+ lines.push("| Feature Area | Recall@5 | Recall@10 | NDCG@10 | Tasks |");
99
+ lines.push("|---------------------|----------|-----------|---------|-------|");
100
+ for (const [area, stats] of Object.entries(summary.by_area)) {
101
+ lines.push(`| ${area.padEnd(19)} | ${(stats.avg_recall_at_5 * 100).toFixed(1).padStart(7)}% | ` +
102
+ `${(stats.avg_recall_at_10 * 100).toFixed(1).padStart(8)}% | ` +
103
+ `${(stats.avg_ndcg_at_10 * 100).toFixed(1).padStart(6)}% | ` +
104
+ `${stats.task_count.toString().padStart(5)} |`);
105
+ }
106
+ lines.push("");
107
+ lines.push(`Overall: Recall@5=${(summary.overall.avg_recall_at_5 * 100).toFixed(1)}% ` +
108
+ `Recall@10=${(summary.overall.avg_recall_at_10 * 100).toFixed(1)}% ` +
109
+ `NDCG@10=${(summary.overall.avg_ndcg_at_10 * 100).toFixed(1)}%`);
110
+ return lines.join("\n");
111
+ }
@@ -0,0 +1,86 @@
1
+ /**
2
+ * pipeline/mirror-repo-tasks.ts
3
+ *
4
+ * Mirrors repo-based TaskDefinitions into the Sanity Content Lake as
5
+ * ailf.task documents with origin provenance. This makes the Content Lake
6
+ * the universal registry — every task (native or repo-sourced) is visible
7
+ * in Studio.
8
+ *
9
+ * The mirror is idempotent: deterministic document IDs + content hashing
10
+ * means unchanged tasks are skipped. Changed tasks are upserted via
11
+ * createOrReplace.
12
+ *
13
+ * @see docs/exec-plans/completed/tasks-as-content/phase-5-content-lake-mirroring.md
14
+ */
15
+ import type { SanityClient } from "@sanity/client";
16
+ import { type TaskDefinition } from "../_vendor/ailf-core/index.d.ts";
17
+ export interface MirrorOptions {
18
+ /** Sanity client with write access */
19
+ client: SanityClient;
20
+ /** Tasks to mirror (already loaded from repo) */
21
+ tasks: TaskDefinition[];
22
+ /** Git context for origin provenance */
23
+ git: GitContext;
24
+ /** If true, log what would be done without writing */
25
+ dryRun?: boolean;
26
+ }
27
+ export interface GitContext {
28
+ /** Full repo identifier (e.g., "sanity-io/visual-editing") */
29
+ repo: string;
30
+ /** Repo owner (e.g., "sanity-io") */
31
+ owner: string;
32
+ /** Repo name (e.g., "visual-editing") */
33
+ name: string;
34
+ /** Current branch */
35
+ branch: string;
36
+ /** HEAD commit SHA */
37
+ commitSha: string;
38
+ }
39
+ export interface MirrorResult {
40
+ /** Total tasks processed */
41
+ total: number;
42
+ /** Tasks created or updated */
43
+ upserted: number;
44
+ /** Tasks skipped (unchanged) */
45
+ skipped: number;
46
+ /** Feature areas auto-created */
47
+ areasCreated: string[];
48
+ /** Canonical doc slugs that failed to resolve */
49
+ unresolvedSlugs: string[];
50
+ /** Errors (non-fatal — mirror continues) */
51
+ errors: string[];
52
+ }
53
+ /**
54
+ * Mirror repo tasks to the Content Lake.
55
+ *
56
+ * For each task:
57
+ * 1. Compute deterministic document ID
58
+ * 2. Compute content hash of the task definition
59
+ * 3. Check if mirror document exists with same hash → skip if unchanged
60
+ * 4. Resolve canonical doc slugs → Sanity references
61
+ * 5. Auto-create feature areas if needed
62
+ * 6. Upsert the ailf.task document with origin block
63
+ */
64
+ export declare function mirrorRepoTasks(options: MirrorOptions): Promise<MirrorResult>;
65
+ /**
66
+ * Detect git context from GitHub Actions environment variables,
67
+ * falling back to git CLI commands.
68
+ */
69
+ export declare function detectGitContext(repoTasksPath: string): Promise<GitContext>;
70
+ /**
71
+ * Deterministic mirror document ID.
72
+ *
73
+ * Format: ailf.task.mirror.<owner>.<repo>.<taskId>
74
+ *
75
+ * Dots in owner/repo/taskId are replaced with hyphens to avoid
76
+ * Sanity document ID issues.
77
+ */
78
+ export declare function mirrorDocId(owner: string, repo: string, taskId: string): string;
79
+ /**
80
+ * Compute a content hash of a TaskDefinition for change detection.
81
+ *
82
+ * Includes all fields that affect the mirror document. Excludes
83
+ * runtime metadata like referenceSolution (filesystem path) since
84
+ * that's not mirrored.
85
+ */
86
+ export declare function computeTaskHash(task: TaskDefinition): string;
@@ -0,0 +1,350 @@
1
+ /**
2
+ * pipeline/mirror-repo-tasks.ts
3
+ *
4
+ * Mirrors repo-based TaskDefinitions into the Sanity Content Lake as
5
+ * ailf.task documents with origin provenance. This makes the Content Lake
6
+ * the universal registry — every task (native or repo-sourced) is visible
7
+ * in Studio.
8
+ *
9
+ * The mirror is idempotent: deterministic document IDs + content hashing
10
+ * means unchanged tasks are skipped. Changed tasks are upserted via
11
+ * createOrReplace.
12
+ *
13
+ * @see docs/exec-plans/completed/tasks-as-content/phase-5-content-lake-mirroring.md
14
+ */
15
+ import { createHash } from "crypto";
16
+ import { isSlugRef, } from "../_vendor/ailf-core/index.js";
17
+ // ---------------------------------------------------------------------------
18
+ // Public API
19
+ // ---------------------------------------------------------------------------
20
+ /**
21
+ * Mirror repo tasks to the Content Lake.
22
+ *
23
+ * For each task:
24
+ * 1. Compute deterministic document ID
25
+ * 2. Compute content hash of the task definition
26
+ * 3. Check if mirror document exists with same hash → skip if unchanged
27
+ * 4. Resolve canonical doc slugs → Sanity references
28
+ * 5. Auto-create feature areas if needed
29
+ * 6. Upsert the ailf.task document with origin block
30
+ */
31
+ export async function mirrorRepoTasks(options) {
32
+ const { client, tasks, git, dryRun = false } = options;
33
+ const result = {
34
+ total: tasks.length,
35
+ upserted: 0,
36
+ skipped: 0,
37
+ areasCreated: [],
38
+ unresolvedSlugs: [],
39
+ errors: [],
40
+ };
41
+ if (tasks.length === 0)
42
+ return result;
43
+ // Batch-resolve all canonical doc slugs (slug refs only — other ref types
44
+ // are stored without a resolved article reference for now)
45
+ const allSlugs = [
46
+ ...new Set(tasks.flatMap((t) => t.canonicalDocs.filter(isSlugRef).map((d) => d.slug))),
47
+ ];
48
+ const slugToDocId = await batchResolveDocSlugs(client, allSlugs);
49
+ // Track unresolved slugs
50
+ for (const slug of allSlugs) {
51
+ if (!slugToDocId.has(slug)) {
52
+ result.unresolvedSlugs.push(slug);
53
+ }
54
+ }
55
+ // Ensure all feature areas exist
56
+ const areas = [...new Set(tasks.map((t) => t.featureArea))];
57
+ const createdAreas = await ensureFeatureAreas(client, areas, dryRun);
58
+ result.areasCreated = createdAreas;
59
+ // Fetch existing mirror document content hashes for change detection
60
+ const mirrorIds = tasks.map((t) => mirrorDocId(git.owner, git.name, t.id));
61
+ const existingHashes = await fetchExistingHashes(client, mirrorIds);
62
+ // Mirror each task
63
+ for (const task of tasks) {
64
+ try {
65
+ const docId = mirrorDocId(git.owner, git.name, task.id);
66
+ const contentHash = computeTaskHash(task);
67
+ // Skip unchanged
68
+ if (existingHashes.get(docId) === contentHash) {
69
+ result.skipped++;
70
+ continue;
71
+ }
72
+ const doc = buildMirrorDocument(task, {
73
+ contentHash,
74
+ docId,
75
+ git,
76
+ slugToDocId,
77
+ });
78
+ if (dryRun) {
79
+ console.log(` [dry-run] Would upsert: ${docId}`);
80
+ result.upserted++;
81
+ continue;
82
+ }
83
+ await client.createOrReplace(doc);
84
+ result.upserted++;
85
+ }
86
+ catch (err) {
87
+ const msg = err instanceof Error ? err.message : String(err);
88
+ result.errors.push(`Failed to mirror "${task.id}": ${msg}`);
89
+ }
90
+ }
91
+ return result;
92
+ }
93
+ // ---------------------------------------------------------------------------
94
+ // Detect git context from environment or CLI
95
+ // ---------------------------------------------------------------------------
96
+ /**
97
+ * Detect git context from GitHub Actions environment variables,
98
+ * falling back to git CLI commands.
99
+ */
100
+ export async function detectGitContext(repoTasksPath) {
101
+ // GitHub Actions provides these env vars
102
+ const ghRepo = process.env.GITHUB_REPOSITORY; // "owner/name"
103
+ const ghRef = process.env.GITHUB_REF ?? "";
104
+ const ghSha = process.env.GITHUB_SHA ?? "";
105
+ const ghHeadRef = process.env.GITHUB_HEAD_REF ?? "";
106
+ if (ghRepo) {
107
+ const [owner, name] = ghRepo.split("/");
108
+ const branch = ghHeadRef || ghRef.replace("refs/heads/", "").replace("refs/tags/", "");
109
+ return {
110
+ repo: ghRepo,
111
+ owner: owner ?? "unknown",
112
+ name: name ?? "unknown",
113
+ branch: branch || "unknown",
114
+ commitSha: ghSha || "unknown",
115
+ };
116
+ }
117
+ // Fallback: try git CLI
118
+ const { execSync } = await import("child_process");
119
+ try {
120
+ const remote = execSync("git remote get-url origin", {
121
+ encoding: "utf-8",
122
+ cwd: repoTasksPath,
123
+ }).trim();
124
+ const branch = execSync("git rev-parse --abbrev-ref HEAD", {
125
+ encoding: "utf-8",
126
+ cwd: repoTasksPath,
127
+ }).trim();
128
+ const commitSha = execSync("git rev-parse HEAD", {
129
+ encoding: "utf-8",
130
+ cwd: repoTasksPath,
131
+ }).trim();
132
+ // Parse remote URL: https://github.com/owner/name.git or git@github.com:owner/name.git
133
+ const match = remote.match(/github\.com[:/]([^/]+)\/([^/.]+)/) ??
134
+ remote.match(/([^/]+)\/([^/.]+?)(?:\.git)?$/);
135
+ const owner = match?.[1] ?? "unknown";
136
+ const name = match?.[2] ?? "unknown";
137
+ return {
138
+ repo: `${owner}/${name}`,
139
+ owner,
140
+ name,
141
+ branch,
142
+ commitSha,
143
+ };
144
+ }
145
+ catch {
146
+ return {
147
+ repo: "unknown/unknown",
148
+ owner: "unknown",
149
+ name: "unknown",
150
+ branch: "unknown",
151
+ commitSha: "unknown",
152
+ };
153
+ }
154
+ }
155
+ // ---------------------------------------------------------------------------
156
+ // Document ID scheme
157
+ // ---------------------------------------------------------------------------
158
+ /**
159
+ * Deterministic mirror document ID.
160
+ *
161
+ * Format: ailf.task.mirror.<owner>.<repo>.<taskId>
162
+ *
163
+ * Dots in owner/repo/taskId are replaced with hyphens to avoid
164
+ * Sanity document ID issues.
165
+ */
166
+ export function mirrorDocId(owner, repo, taskId) {
167
+ const sanitize = (s) => s.replace(/[^a-z0-9-]/gi, "-").toLowerCase();
168
+ return `ailf.task.mirror.${sanitize(owner)}.${sanitize(repo)}.${sanitize(taskId)}`;
169
+ }
170
+ // ---------------------------------------------------------------------------
171
+ // Content hashing
172
+ // ---------------------------------------------------------------------------
173
+ /**
174
+ * Compute a content hash of a TaskDefinition for change detection.
175
+ *
176
+ * Includes all fields that affect the mirror document. Excludes
177
+ * runtime metadata like referenceSolution (filesystem path) since
178
+ * that's not mirrored.
179
+ */
180
+ export function computeTaskHash(task) {
181
+ const payload = JSON.stringify({
182
+ id: task.id,
183
+ description: task.description,
184
+ featureArea: task.featureArea,
185
+ taskPrompt: task.taskPrompt,
186
+ canonicalDocs: task.canonicalDocs,
187
+ docCoverage: task.docCoverage,
188
+ assertions: task.assertions,
189
+ baseline: task.baseline,
190
+ });
191
+ return createHash("sha256").update(payload).digest("hex").slice(0, 16);
192
+ }
193
+ // ---------------------------------------------------------------------------
194
+ // Batch slug resolution
195
+ // ---------------------------------------------------------------------------
196
+ /**
197
+ * Resolve an array of document slugs to Sanity document IDs in a single query.
198
+ */
199
+ async function batchResolveDocSlugs(client, slugs) {
200
+ if (slugs.length === 0)
201
+ return new Map();
202
+ const query = `*[_type == "article" && slug.current in $slugs] {
203
+ _id,
204
+ "slug": slug.current
205
+ }`;
206
+ const results = await client.fetch(query, {
207
+ slugs,
208
+ });
209
+ const map = new Map();
210
+ for (const r of results) {
211
+ map.set(r.slug, r._id);
212
+ }
213
+ return map;
214
+ }
215
+ // ---------------------------------------------------------------------------
216
+ // Feature area auto-creation
217
+ // ---------------------------------------------------------------------------
218
+ /**
219
+ * Ensure ailf.featureArea documents exist for all referenced areas.
220
+ * Returns the list of newly created area IDs.
221
+ */
222
+ async function ensureFeatureAreas(client, areas, dryRun) {
223
+ if (areas.length === 0)
224
+ return [];
225
+ // Check which areas already exist
226
+ const existing = await client.fetch(`*[_type == "ailf.featureArea" && areaId.current in $areas].areaId.current`, { areas });
227
+ const existingSet = new Set(existing);
228
+ const missing = areas.filter((a) => !existingSet.has(a));
229
+ if (missing.length === 0)
230
+ return [];
231
+ if (dryRun) {
232
+ for (const area of missing) {
233
+ console.log(` [dry-run] Would create feature area: ${area}`);
234
+ }
235
+ return missing;
236
+ }
237
+ const transaction = client.transaction();
238
+ for (const area of missing) {
239
+ const docId = `ailf.featureArea.${area}`;
240
+ transaction.createOrReplace({
241
+ _id: docId,
242
+ _type: "ailf.featureArea",
243
+ areaId: { _type: "slug", current: area },
244
+ description: area.charAt(0).toUpperCase() + area.slice(1).replace(/-/g, " "),
245
+ });
246
+ }
247
+ await transaction.commit();
248
+ return missing;
249
+ }
250
+ // ---------------------------------------------------------------------------
251
+ // Fetch existing content hashes
252
+ // ---------------------------------------------------------------------------
253
+ /**
254
+ * Fetch existing mirror documents' content hashes for change detection.
255
+ * The hash is stored in origin.contentHash on the document.
256
+ */
257
+ async function fetchExistingHashes(client, docIds) {
258
+ if (docIds.length === 0)
259
+ return new Map();
260
+ const query = `*[_id in $ids] { _id, "hash": origin.contentHash }`;
261
+ const results = await client.fetch(query, {
262
+ ids: docIds,
263
+ });
264
+ const map = new Map();
265
+ for (const r of results) {
266
+ if (r.hash)
267
+ map.set(r._id, r.hash);
268
+ }
269
+ return map;
270
+ }
271
+ // ---------------------------------------------------------------------------
272
+ // Build mirror document
273
+ // ---------------------------------------------------------------------------
274
+ function buildMirrorDocument(task, opts) {
275
+ const { contentHash, docId, git, slugToDocId } = opts;
276
+ // Build canonical docs with resolved references.
277
+ // Only slug refs can be resolved to article references today.
278
+ // Other ref types (path, id, perspective) are stored with reason only.
279
+ const canonicalDocs = task.canonicalDocs.map((ref, i) => {
280
+ const resolvedId = isSlugRef(ref) ? slugToDocId.get(ref.slug) : undefined;
281
+ return {
282
+ _key: `cd${i}`,
283
+ ...(resolvedId ? { doc: { _ref: resolvedId, _type: "reference" } } : {}),
284
+ reason: ref.reason ?? "",
285
+ };
286
+ });
287
+ // Build assertions
288
+ const assertArray = task.assertions.map((a, i) => {
289
+ const entry = {
290
+ _key: `a${i}`,
291
+ type: a.type,
292
+ };
293
+ if (a.type === "llm-rubric" && "template" in a) {
294
+ entry.template = a.template;
295
+ if ("criteria" in a &&
296
+ Array.isArray(a.criteria)) {
297
+ entry.criteria = a.criteria;
298
+ }
299
+ }
300
+ if ("value" in a && a.value !== undefined) {
301
+ entry.value =
302
+ typeof a.value === "string" ? a.value : JSON.stringify(a.value);
303
+ }
304
+ if ("threshold" in a &&
305
+ a.threshold !== undefined) {
306
+ entry.threshold = a.threshold;
307
+ }
308
+ if (a.weight !== undefined) {
309
+ entry.weight = a.weight;
310
+ }
311
+ return entry;
312
+ });
313
+ // Determine the source file path (best-effort from task's featureArea)
314
+ const filePath = `.ailf/tasks/${task.featureArea}.yaml`;
315
+ return {
316
+ _id: docId,
317
+ _type: "ailf.task",
318
+ assert: assertArray,
319
+ canonicalDocs,
320
+ description: task.description,
321
+ docCoverage: task.docCoverage,
322
+ featureArea: {
323
+ _ref: `ailf.featureArea.${task.featureArea}`,
324
+ _type: "reference",
325
+ },
326
+ id: { _type: "slug", current: task.id },
327
+ origin: {
328
+ branch: git.branch,
329
+ commitSha: git.commitSha,
330
+ contentHash,
331
+ lastSyncedAt: new Date().toISOString(),
332
+ path: filePath,
333
+ repo: git.repo,
334
+ repoName: git.name,
335
+ repoOwner: git.owner,
336
+ type: "repo",
337
+ },
338
+ taskPrompt: task.taskPrompt,
339
+ ...(task.baseline
340
+ ? {
341
+ baseline: {
342
+ ...(task.baseline.enabled !== undefined
343
+ ? { enabled: task.baseline.enabled }
344
+ : {}),
345
+ ...(task.baseline.rubric ? { rubric: task.baseline.rubric } : {}),
346
+ },
347
+ }
348
+ : {}),
349
+ };
350
+ }
@@ -0,0 +1,33 @@
1
+ /**
2
+ * pipeline/plan-format.ts
3
+ *
4
+ * Formatters for rendering an ExecutionPlan to console output or JSON.
5
+ *
6
+ * The console formatter produces a rich, human-readable preview with
7
+ * emoji markers, alignment, and color-coding (via unicode markers).
8
+ * The JSON formatter produces machine-readable output for CI/CD.
9
+ *
10
+ * @see docs/exec-plans/active/execution-preview.md
11
+ */
12
+ import type { ExecutionPlan } from "./plan.js";
13
+ /**
14
+ * Format an execution plan as a rich console string.
15
+ *
16
+ * Produces a multi-section output with emoji markers showing:
17
+ * - Command summary (mode, source, flags)
18
+ * - Step plan (which steps will run, cached, or skipped)
19
+ * - Test/task summary
20
+ * - Model list
21
+ * - Cost estimate
22
+ * - File I/O
23
+ * - Comparison context
24
+ * - Warnings/errors
25
+ */
26
+ export declare function formatPlanConsole(plan: ExecutionPlan): string;
27
+ /**
28
+ * Format an execution plan as indented JSON.
29
+ *
30
+ * Useful for CI/CD integration, approval gates, and programmatic
31
+ * inspection of the plan.
32
+ */
33
+ export declare function formatPlanJson(plan: ExecutionPlan): string;