@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Shell delegation for the fetch-docs step.
3
+ *
4
+ * Isolates the execSync call so it can be replaced when the pipeline
5
+ * fully migrates to the DocFetcher port.
6
+ */
7
+ import { execSync } from "child_process";
8
+ /**
9
+ * Run `pnpm fetch-docs` via shell.
10
+ *
11
+ * Returns a result object instead of throwing so the step can
12
+ * handle the failure uniformly.
13
+ */
14
+ export function runFetchDocsShell(rootDir, source) {
15
+ try {
16
+ const sourceArg = source ? ` --source ${source}` : "";
17
+ execSync(`pnpm fetch-docs${sourceArg}`, {
18
+ cwd: rootDir,
19
+ env: process.env,
20
+ stdio: "inherit",
21
+ });
22
+ return { ok: true };
23
+ }
24
+ catch (err) {
25
+ return {
26
+ ok: false,
27
+ error: err instanceof Error ? err.message : String(err),
28
+ };
29
+ }
30
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Pipeline step: Fetch documentation from Sanity CMS.
3
+ *
4
+ * Uses ctx.docFetcher (the DocFetcher port) for all fetching. The adapter
5
+ * handles GROQ queries, perspective diffing, document overlays, and URL
6
+ * fetching. This step orchestrates the call and writes metadata files.
7
+ */
8
+ import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
9
+ export declare class FetchDocsStep implements PipelineStep {
10
+ readonly name = "fetch-docs";
11
+ check(): ValidationIssue[];
12
+ execute(ctx: AppContext): Promise<StepResult>;
13
+ cacheInputs(ctx: AppContext): string[];
14
+ }
@@ -0,0 +1,135 @@
1
+ /**
2
+ * Pipeline step: Fetch documentation from Sanity CMS.
3
+ *
4
+ * Uses ctx.docFetcher (the DocFetcher port) for all fetching. The adapter
5
+ * handles GROQ queries, perspective diffing, document overlays, and URL
6
+ * fetching. This step orchestrates the call and writes metadata files.
7
+ */
8
+ import { mkdirSync, writeFileSync } from "fs";
9
+ import { join } from "path";
10
+ import { getStepInputPaths } from "../../pipeline/cache.js";
11
+ import { checkCanonicalContextsExist } from "../../pipeline/checks.js";
12
+ import { loadSource } from "../../sources.js";
13
+ import { configToSourceOverrides } from "../config-to-source-overrides.js";
14
+ export class FetchDocsStep {
15
+ name = "fetch-docs";
16
+ check() {
17
+ return [];
18
+ }
19
+ async execute(ctx) {
20
+ if (ctx.config.skipFetch) {
21
+ return { status: "skipped", reason: "--skip-fetch" };
22
+ }
23
+ const start = Date.now();
24
+ // Precondition: at least one task has canonical doc mappings
25
+ const tasks = await ctx.taskSource.loadTasks(buildFilter(ctx));
26
+ const tasksWithDocs = tasks.filter((t) => t.canonicalDocs.length > 0);
27
+ if (tasksWithDocs.length === 0) {
28
+ return {
29
+ durationMs: Date.now() - start,
30
+ error: "No tasks with canonical_docs found. Add canonical_docs to your task definitions.",
31
+ status: "failed",
32
+ };
33
+ }
34
+ // Resolve source once with typed overrides
35
+ const overrides = configToSourceOverrides(ctx.config);
36
+ const resolvedSource = loadSource(ctx.config.source, overrides);
37
+ // Log source info
38
+ console.log(` Source: ${resolvedSource.name}`);
39
+ console.log(` Base URL: ${resolvedSource.baseUrl}`);
40
+ if (resolvedSource.perspective) {
41
+ console.log(` Perspective: ${resolvedSource.perspective}`);
42
+ }
43
+ if (resolvedSource.documentIds && resolvedSource.documentIds.length > 0) {
44
+ console.log(` Documents: ${resolvedSource.documentIds.length} document ID(s)`);
45
+ }
46
+ if (resolvedSource.urls.length > 0) {
47
+ console.log(` URLs: ${resolvedSource.urls.length} direct URL(s)`);
48
+ }
49
+ // Precondition: docFetcher must be available
50
+ if (!ctx.docFetcher) {
51
+ return {
52
+ durationMs: Date.now() - start,
53
+ error: "DocFetcher port not available. Ensure composition root wires ctx.docFetcher.",
54
+ status: "failed",
55
+ };
56
+ }
57
+ // Execute the fetch via the DocFetcher port
58
+ try {
59
+ const result = await ctx.docFetcher.fetch(tasksWithDocs, resolvedSource);
60
+ // Write metadata files for downstream pipeline consumption
61
+ if (result.metadata) {
62
+ writeMetadataFiles(ctx.config.rootDir, result.metadata);
63
+ }
64
+ }
65
+ catch (err) {
66
+ return {
67
+ durationMs: Date.now() - start,
68
+ error: `fetch-docs failed: ${err instanceof Error ? err.message : String(err)}`,
69
+ status: "failed",
70
+ };
71
+ }
72
+ // Postcondition: canonical context files exist for all tasks
73
+ const taskIds = tasksWithDocs.map((t) => t.id);
74
+ const contextIssues = checkCanonicalContextsExist(ctx.config.rootDir, taskIds);
75
+ const contextErrors = contextIssues.filter((i) => i.severity === "error");
76
+ if (contextErrors.length > 0) {
77
+ return {
78
+ durationMs: Date.now() - start,
79
+ error: `Postcondition failed: ${contextErrors.map((e) => e.message).join("; ")}`,
80
+ status: "failed",
81
+ };
82
+ }
83
+ const durationMs = Date.now() - start;
84
+ return {
85
+ durationMs,
86
+ status: "success",
87
+ summary: `Fetched canonical contexts for ${taskIds.length} tasks`,
88
+ };
89
+ }
90
+ cacheInputs(ctx) {
91
+ return getStepInputPaths(ctx.config.rootDir, "fetch-docs");
92
+ }
93
+ }
94
+ // ---------------------------------------------------------------------------
95
+ // Helpers
96
+ // ---------------------------------------------------------------------------
97
+ function buildFilter(ctx) {
98
+ const { areas, tasks } = ctx.config;
99
+ if (!areas && !tasks)
100
+ return undefined;
101
+ return {
102
+ ...(areas ? { areas } : {}),
103
+ ...(tasks ? { taskIds: tasks } : {}),
104
+ };
105
+ }
106
+ /**
107
+ * Write metadata files returned by DocFetcher to the contexts/ directory.
108
+ *
109
+ * These files are consumed by downstream pipeline steps (scoring,
110
+ * comparison, reporting) for traceability and impact analysis.
111
+ */
112
+ function writeMetadataFiles(rootDir, metadata) {
113
+ const contextsDir = join(rootDir, "contexts");
114
+ mkdirSync(contextsDir, { recursive: true });
115
+ if (metadata.manifest) {
116
+ const path = join(contextsDir, "document-manifest.json");
117
+ writeFileSync(path, JSON.stringify(metadata.manifest, null, 2));
118
+ console.log(` 📋 Document manifest: ${metadata.manifest.length} docs → contexts/document-manifest.json`);
119
+ }
120
+ if (metadata.releaseImpact) {
121
+ const path = join(contextsDir, "release-impact.json");
122
+ writeFileSync(path, JSON.stringify(metadata.releaseImpact, null, 2));
123
+ console.log(" 📄 Release impact written to contexts/release-impact.json");
124
+ }
125
+ if (metadata.documentOverlay) {
126
+ const path = join(contextsDir, "document-overlay.json");
127
+ writeFileSync(path, JSON.stringify(metadata.documentOverlay, null, 2));
128
+ console.log(" 📄 Document overlay written to contexts/document-overlay.json");
129
+ }
130
+ if (metadata.urlFetch) {
131
+ const path = join(contextsDir, "url-fetch.json");
132
+ writeFileSync(path, JSON.stringify(metadata.urlFetch, null, 2));
133
+ console.log(" 📄 URL fetch metadata written to contexts/url-fetch.json");
134
+ }
135
+ }
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Pipeline step: Gap analysis (failure modes + remediation plan).
3
+ *
4
+ * Wraps the inline gap-analysis logic from pipeline-action.ts behind
5
+ * the PipelineStep interface. This includes document manifest enrichment
6
+ * and low-scoring judgment extraction.
7
+ *
8
+ * This is an optional step — failure doesn't stop the pipeline.
9
+ */
10
+ import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
11
+ export declare class GapAnalysisStep implements PipelineStep {
12
+ readonly name = "gap-analysis";
13
+ readonly optional = true;
14
+ check(ctx: AppContext): ValidationIssue[];
15
+ execute(ctx: AppContext): Promise<StepResult>;
16
+ }
@@ -0,0 +1,136 @@
1
+ /**
2
+ * Pipeline step: Gap analysis (failure modes + remediation plan).
3
+ *
4
+ * Wraps the inline gap-analysis logic from pipeline-action.ts behind
5
+ * the PipelineStep interface. This includes document manifest enrichment
6
+ * and low-scoring judgment extraction.
7
+ *
8
+ * This is an optional step — failure doesn't stop the pipeline.
9
+ */
10
+ import { existsSync, readFileSync, writeFileSync } from "fs";
11
+ import { join, resolve } from "path";
12
+ export class GapAnalysisStep {
13
+ name = "gap-analysis";
14
+ optional = true;
15
+ check(ctx) {
16
+ const judgmentsPath = resolve(ctx.config.rootDir, "results", "latest", "grader-judgments.json");
17
+ if (!existsSync(judgmentsPath)) {
18
+ return [
19
+ {
20
+ message: "No grader-judgments.json — run a full evaluation first",
21
+ severity: "warning",
22
+ source: "gap-analysis",
23
+ },
24
+ ];
25
+ }
26
+ return [];
27
+ }
28
+ async execute(ctx) {
29
+ const root = ctx.config.rootDir;
30
+ const start = Date.now();
31
+ const judgmentsPath = resolve(root, "results", "latest", "grader-judgments.json");
32
+ const scoreSummaryPath = resolve(root, "results", "latest", "score-summary.json");
33
+ if (!existsSync(judgmentsPath)) {
34
+ return {
35
+ status: "skipped",
36
+ reason: "No grader-judgments.json — run a full evaluation first",
37
+ };
38
+ }
39
+ if (!existsSync(scoreSummaryPath)) {
40
+ return { status: "skipped", reason: "No score-summary.json" };
41
+ }
42
+ try {
43
+ const { buildFailureModeReport, formatFailureModesConsole } = await import("../../pipeline/failure-modes.js");
44
+ const { buildGapAnalysisReport, formatGapAnalysisConsole } = await import("../../pipeline/gap-analysis.js");
45
+ const judgments = JSON.parse(readFileSync(judgmentsPath, "utf-8"));
46
+ const scoreSummary = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
47
+ const failureModeReport = buildFailureModeReport(judgments, scoreSummary.scores);
48
+ console.log(formatFailureModesConsole(failureModeReport));
49
+ const gapReport = buildGapAnalysisReport(failureModeReport, scoreSummary.scores);
50
+ console.log(formatGapAnalysisConsole(gapReport));
51
+ const outDir = resolve(root, "results", "latest");
52
+ writeFileSync(join(outDir, "failure-modes.json"), JSON.stringify(failureModeReport, null, 2));
53
+ writeFileSync(join(outDir, "gap-analysis.json"), JSON.stringify(gapReport, null, 2));
54
+ // ── Document manifest + enrichment ─────────────────────────
55
+ const { resolveMappings } = await import("../../pipeline/resolve-mappings.js");
56
+ const mappings = resolveMappings(root);
57
+ const manifestPath = resolve(root, "contexts", "document-manifest.json");
58
+ const manifestEntries = existsSync(manifestPath)
59
+ ? JSON.parse(readFileSync(manifestPath, "utf-8"))
60
+ : [];
61
+ const refBySlug = new Map();
62
+ for (const entry of manifestEntries) {
63
+ refBySlug.set(entry.slug, entry);
64
+ }
65
+ const resolveRefs = (slugs) => slugs
66
+ .map((slug) => {
67
+ const m = refBySlug.get(slug);
68
+ return m
69
+ ? {
70
+ documentId: m._id,
71
+ revision: m._rev,
72
+ slug: m.slug,
73
+ title: m.title,
74
+ }
75
+ : { documentId: "", slug, title: slug };
76
+ })
77
+ .filter((r) => r.documentId !== "");
78
+ const descToDocRefs = new Map();
79
+ const areaToDocRefs = new Map();
80
+ for (const [area, areaData] of Object.entries(mappings.feature_areas)) {
81
+ const areaSlugs = new Set();
82
+ for (const task of areaData.tasks) {
83
+ const taskSlugs = task.canonical_docs.map((d) => d.slug);
84
+ descToDocRefs.set(task.description, resolveRefs(taskSlugs));
85
+ for (const s of taskSlugs)
86
+ areaSlugs.add(s);
87
+ }
88
+ areaToDocRefs.set(area, resolveRefs([...areaSlugs]));
89
+ }
90
+ const documentManifest = resolveRefs([...refBySlug.keys()]);
91
+ const enrichedScores = scoreSummary.scores.map((s) => ({
92
+ ...s,
93
+ documents: areaToDocRefs.get(s.feature),
94
+ }));
95
+ // ── Low-scoring judgments ────────────────────────────────────
96
+ const LOW_SCORE_THRESHOLD = 70;
97
+ const MAX_STORED_JUDGMENTS = 50;
98
+ const normalizedJudgments = judgments.map((j) => ({
99
+ ...j,
100
+ score: j.score <= 1 && j.score > 0 ? Math.round(j.score * 100) : j.score,
101
+ }));
102
+ const lowScoringJudgments = normalizedJudgments
103
+ .filter((j) => j.score < LOW_SCORE_THRESHOLD)
104
+ .sort((a, b) => a.score - b.score)
105
+ .slice(0, MAX_STORED_JUDGMENTS)
106
+ .map((j) => {
107
+ const baseDesc = j.taskId.replace(/\s*\((gold|baseline)\)\s*$/, "");
108
+ const canonicalDocs = descToDocRefs.get(baseDesc);
109
+ return canonicalDocs ? { ...j, canonicalDocs } : j;
110
+ });
111
+ const enrichedSummary = {
112
+ ...scoreSummary,
113
+ documentManifest,
114
+ failureModes: failureModeReport,
115
+ lowScoringJudgments,
116
+ recommendations: gapReport,
117
+ scores: enrichedScores,
118
+ };
119
+ writeFileSync(scoreSummaryPath, JSON.stringify(enrichedSummary, null, 2));
120
+ const gapCount = gapReport.gaps.length;
121
+ const classRate = failureModeReport.classificationRate.toFixed(0);
122
+ return {
123
+ durationMs: Date.now() - start,
124
+ status: "success",
125
+ summary: `${failureModeReport.totalJudgments} judgments analyzed (${classRate}% classified), ${gapCount} actionable gaps identified`,
126
+ };
127
+ }
128
+ catch (err) {
129
+ return {
130
+ durationMs: Date.now() - start,
131
+ error: err instanceof Error ? err.message : String(err),
132
+ status: "failed",
133
+ };
134
+ }
135
+ }
136
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Pipeline step: Generate Promptfoo configuration files.
3
+ *
4
+ * Calls generateConfigs() from pipeline/generate-configs.ts with typed options
5
+ * derived from AppContext. No env bridge needed — source is resolved and
6
+ * passed directly.
7
+ */
8
+ import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
9
+ export declare class GenerateConfigsStep implements PipelineStep {
10
+ readonly name = "generate-configs";
11
+ check(ctx: AppContext): ValidationIssue[];
12
+ execute(ctx: AppContext): Promise<StepResult>;
13
+ cacheInputs(ctx: AppContext): string[];
14
+ }
@@ -0,0 +1,85 @@
1
+ /**
2
+ * Pipeline step: Generate Promptfoo configuration files.
3
+ *
4
+ * Calls generateConfigs() from pipeline/generate-configs.ts with typed options
5
+ * derived from AppContext. No env bridge needed — source is resolved and
6
+ * passed directly.
7
+ */
8
+ import { getStepInputPaths } from "../../pipeline/cache.js";
9
+ import { checkGeneratedConfigsExist } from "../../pipeline/checks.js";
10
+ import { generateConfigs } from "../../pipeline/generate-configs.js";
11
+ import { validateModelsYaml } from "../../pipeline/validate.js";
12
+ import { loadSource } from "../../sources.js";
13
+ import { configToSourceOverrides } from "../config-to-source-overrides.js";
14
+ export class GenerateConfigsStep {
15
+ name = "generate-configs";
16
+ check(ctx) {
17
+ const issues = validateModelsYaml(ctx.config.rootDir);
18
+ return issues.filter((i) => i.severity === "error");
19
+ }
20
+ async execute(ctx) {
21
+ const start = Date.now();
22
+ // Resolve source once with typed overrides
23
+ const overrides = configToSourceOverrides(ctx.config);
24
+ const resolvedSource = ctx.config.source
25
+ ? loadSource(ctx.config.source, overrides)
26
+ : undefined;
27
+ // Load tasks via the TaskSource port — this picks up Content Lake,
28
+ // repo-based, and YAML tasks depending on which adapter is wired.
29
+ let tasks;
30
+ try {
31
+ const filter = ctx.config.areas || ctx.config.tasks
32
+ ? { areas: ctx.config.areas, taskIds: ctx.config.tasks }
33
+ : undefined;
34
+ tasks = await ctx.taskSource.loadTasks(filter);
35
+ }
36
+ catch (err) {
37
+ return {
38
+ durationMs: Date.now() - start,
39
+ error: `TaskSource.loadTasks failed: ${err instanceof Error ? err.message : String(err)}`,
40
+ status: "failed",
41
+ };
42
+ }
43
+ try {
44
+ generateConfigs({
45
+ allowedOrigins: ctx.config.allowedOrigins,
46
+ filter: ctx.config.areas || ctx.config.tasks
47
+ ? {
48
+ areas: ctx.config.areas,
49
+ taskIds: ctx.config.tasks,
50
+ }
51
+ : undefined,
52
+ resolvedSource,
53
+ rootDir: ctx.config.rootDir,
54
+ searchMode: ctx.config.searchMode,
55
+ source: ctx.config.source,
56
+ tasks,
57
+ });
58
+ }
59
+ catch (err) {
60
+ return {
61
+ durationMs: Date.now() - start,
62
+ error: `generate-configs failed: ${err instanceof Error ? err.message : String(err)}`,
63
+ status: "failed",
64
+ };
65
+ }
66
+ // Postcondition: config files exist
67
+ const configIssues = checkGeneratedConfigsExist(ctx.config.rootDir);
68
+ const configErrors = configIssues.filter((i) => i.severity === "error");
69
+ if (configErrors.length > 0) {
70
+ return {
71
+ durationMs: Date.now() - start,
72
+ error: `Postcondition failed: ${configErrors.map((e) => e.message).join("; ")}`,
73
+ status: "failed",
74
+ };
75
+ }
76
+ return {
77
+ durationMs: Date.now() - start,
78
+ status: "success",
79
+ summary: "Generated promptfoo config files",
80
+ };
81
+ }
82
+ cacheInputs(ctx) {
83
+ return getStepInputPaths(ctx.config.rootDir, "generate-configs");
84
+ }
85
+ }
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Pipeline step: Grader consistency analysis.
3
+ *
4
+ * Calls pipeline/grader-consistency-runner.ts directly with typed options.
5
+ * No env bridge needed — all parameters are passed directly.
6
+ */
7
+ import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
8
+ export declare class GraderConsistencyStep implements PipelineStep {
9
+ readonly name = "grader-consistency";
10
+ readonly optional = true;
11
+ check(): ValidationIssue[];
12
+ execute(ctx: AppContext): Promise<StepResult>;
13
+ }
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Pipeline step: Grader consistency analysis.
3
+ *
4
+ * Calls pipeline/grader-consistency-runner.ts directly with typed options.
5
+ * No env bridge needed — all parameters are passed directly.
6
+ */
7
+ import { existsSync } from "fs";
8
+ import { resolve } from "path";
9
+ import { checkResultsExist } from "../../pipeline/checks.js";
10
+ import { RESULTS_FILES } from "../../pipeline/eval-constants.js";
11
+ import { runGraderConsistency } from "../../pipeline/grader-consistency-runner.js";
12
+ export class GraderConsistencyStep {
13
+ name = "grader-consistency";
14
+ optional = true;
15
+ check() {
16
+ return [];
17
+ }
18
+ async execute(ctx) {
19
+ const start = Date.now();
20
+ const replications = ctx.config.graderReplications ?? 5;
21
+ const concreteMode = ctx.config.mode === "full"
22
+ ? "baseline"
23
+ : ctx.config.mode;
24
+ const resultsFile = RESULTS_FILES[concreteMode];
25
+ // Precondition: results file exists
26
+ const resultsIssues = checkResultsExist(ctx.config.rootDir, resultsFile);
27
+ const resultsErrors = resultsIssues.filter((i) => i.severity === "error");
28
+ if (resultsErrors.length > 0) {
29
+ return {
30
+ durationMs: Date.now() - start,
31
+ error: `Results missing: ${resultsErrors.map((e) => e.message).join("; ")}. Run eval first.`,
32
+ status: "failed",
33
+ };
34
+ }
35
+ try {
36
+ await runGraderConsistency({
37
+ replications,
38
+ resultsPath: resolve(ctx.config.rootDir, resultsFile),
39
+ rootDir: ctx.config.rootDir,
40
+ });
41
+ }
42
+ catch (err) {
43
+ return {
44
+ durationMs: Date.now() - start,
45
+ error: `grader-consistency failed: ${err instanceof Error ? err.message : String(err)}`,
46
+ status: "failed",
47
+ };
48
+ }
49
+ // Postcondition: output file exists
50
+ const outputPath = resolve(ctx.config.rootDir, "results", "latest", "grader-consistency.json");
51
+ if (!existsSync(outputPath)) {
52
+ return {
53
+ durationMs: Date.now() - start,
54
+ error: "grader-consistency.json was not created",
55
+ status: "failed",
56
+ };
57
+ }
58
+ return {
59
+ durationMs: Date.now() - start,
60
+ status: "success",
61
+ summary: `Grader consistency analysis complete (${replications} replications)`,
62
+ };
63
+ }
64
+ }
@@ -0,0 +1,19 @@
1
+ /**
2
+ * PipelineStep implementations — wrappers around existing step functions.
3
+ *
4
+ * Each class delegates to the legacy step function, reading config from
5
+ * AppContext instead of positional parameters.
6
+ */
7
+ export { CalculateScoresStep } from "./calculate-scores-step.js";
8
+ export { CompareStep } from "./compare-step.js";
9
+ export { DiscoveryReportStep } from "./discovery-report-step.js";
10
+ export { FetchDocsStep } from "./fetch-docs-step.js";
11
+ export { GapAnalysisStep } from "./gap-analysis-step.js";
12
+ export { GenerateConfigsStep } from "./generate-configs-step.js";
13
+ export { MirrorRepoTasksStep } from "./mirror-repo-tasks-step.js";
14
+ export { GraderConsistencyStep } from "./grader-consistency-step.js";
15
+ export { PublishReportStep } from "./publish-report-step.js";
16
+ export { ReadinessStep } from "./readiness-step.js";
17
+ export { ReportStep } from "./report-step.js";
18
+ export { RunEvalStep } from "./run-eval-step.js";
19
+ export { ValidateStep } from "./validate-step.js";
@@ -0,0 +1,19 @@
1
+ /**
2
+ * PipelineStep implementations — wrappers around existing step functions.
3
+ *
4
+ * Each class delegates to the legacy step function, reading config from
5
+ * AppContext instead of positional parameters.
6
+ */
7
+ export { CalculateScoresStep } from "./calculate-scores-step.js";
8
+ export { CompareStep } from "./compare-step.js";
9
+ export { DiscoveryReportStep } from "./discovery-report-step.js";
10
+ export { FetchDocsStep } from "./fetch-docs-step.js";
11
+ export { GapAnalysisStep } from "./gap-analysis-step.js";
12
+ export { GenerateConfigsStep } from "./generate-configs-step.js";
13
+ export { MirrorRepoTasksStep } from "./mirror-repo-tasks-step.js";
14
+ export { GraderConsistencyStep } from "./grader-consistency-step.js";
15
+ export { PublishReportStep } from "./publish-report-step.js";
16
+ export { ReadinessStep } from "./readiness-step.js";
17
+ export { ReportStep } from "./report-step.js";
18
+ export { RunEvalStep } from "./run-eval-step.js";
19
+ export { ValidateStep } from "./validate-step.js";
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Pipeline step: Mirror repo-based tasks to the Content Lake.
3
+ *
4
+ * When --repo-tasks-path is provided, this step upserts mirror documents
5
+ * in the Sanity Content Lake for all repo-sourced tasks. This makes
6
+ * repo tasks visible in Studio with full provenance tracking.
7
+ *
8
+ * The step is optional — mirror failure does not block the pipeline.
9
+ * It runs after validate and before fetch-docs so mirror documents
10
+ * exist before evaluation begins.
11
+ *
12
+ * @see packages/eval/src/pipeline/mirror-repo-tasks.ts
13
+ * @see docs/exec-plans/completed/tasks-as-content/phase-5-content-lake-mirroring.md
14
+ */
15
+ import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
16
+ export declare class MirrorRepoTasksStep implements PipelineStep {
17
+ readonly name = "mirror-repo-tasks";
18
+ readonly optional = true;
19
+ check(_ctx: AppContext): ValidationIssue[];
20
+ execute(ctx: AppContext): Promise<StepResult>;
21
+ }
@@ -0,0 +1,94 @@
1
+ /**
2
+ * Pipeline step: Mirror repo-based tasks to the Content Lake.
3
+ *
4
+ * When --repo-tasks-path is provided, this step upserts mirror documents
5
+ * in the Sanity Content Lake for all repo-sourced tasks. This makes
6
+ * repo tasks visible in Studio with full provenance tracking.
7
+ *
8
+ * The step is optional — mirror failure does not block the pipeline.
9
+ * It runs after validate and before fetch-docs so mirror documents
10
+ * exist before evaluation begins.
11
+ *
12
+ * @see packages/eval/src/pipeline/mirror-repo-tasks.ts
13
+ * @see docs/exec-plans/completed/tasks-as-content/phase-5-content-lake-mirroring.md
14
+ */
15
+ import { getSanityClient } from "../../sanity/client.js";
16
+ import { detectGitContext, mirrorRepoTasks, } from "../../pipeline/mirror-repo-tasks.js";
17
+ export class MirrorRepoTasksStep {
18
+ name = "mirror-repo-tasks";
19
+ optional = true;
20
+ check(_ctx) {
21
+ // This step is optional — no hard preconditions
22
+ // If no repo-tasks-path, it will be skipped in execute()
23
+ return [];
24
+ }
25
+ async execute(ctx) {
26
+ const start = Date.now();
27
+ // Skip if no repo tasks configured
28
+ if (!ctx.config.repoTasksPath) {
29
+ return { status: "skipped", reason: "No --repo-tasks-path provided" };
30
+ }
31
+ // Need a write token for mirroring
32
+ const token = process.env.AILF_REPORT_SANITY_API_TOKEN ?? process.env.SANITY_API_TOKEN;
33
+ if (!token) {
34
+ return {
35
+ status: "skipped",
36
+ reason: "No write token available for Content Lake mirroring",
37
+ };
38
+ }
39
+ try {
40
+ // Load repo tasks via the task source
41
+ // The CompositeTaskSource is already wired — but we only want
42
+ // the repo tasks, not the Content Lake ones. Load directly from
43
+ // RepoTaskSource via a fresh instance.
44
+ const { RepoTaskSource } = await import("../../adapters/task-sources/repo-task-source.js");
45
+ const repoSource = new RepoTaskSource(ctx.config.repoTasksPath);
46
+ const repoTasks = await repoSource.loadTasks();
47
+ if (repoTasks.length === 0) {
48
+ return {
49
+ durationMs: Date.now() - start,
50
+ status: "success",
51
+ summary: "No repo tasks to mirror",
52
+ };
53
+ }
54
+ // Detect git context (from env vars or git CLI)
55
+ const git = await detectGitContext(ctx.config.repoTasksPath);
56
+ ctx.logger.info(` Mirroring ${repoTasks.length} repo task(s) from ${git.repo}@${git.branch}`);
57
+ // Create a client with write access
58
+ const client = getSanityClient({ token });
59
+ // Run the mirror
60
+ const result = await mirrorRepoTasks({
61
+ client,
62
+ tasks: repoTasks,
63
+ git,
64
+ });
65
+ // Log results
66
+ if (result.areasCreated.length > 0) {
67
+ ctx.logger.info(` Auto-created feature areas: ${result.areasCreated.join(", ")}`);
68
+ }
69
+ if (result.unresolvedSlugs.length > 0) {
70
+ ctx.logger.warn(` Unresolved canonical doc slugs: ${result.unresolvedSlugs.join(", ")}`);
71
+ }
72
+ for (const err of result.errors) {
73
+ ctx.logger.warn(` Mirror error: ${err}`);
74
+ }
75
+ return {
76
+ durationMs: Date.now() - start,
77
+ status: "success",
78
+ summary: `Mirrored ${result.upserted} task(s), skipped ${result.skipped} unchanged` +
79
+ (result.errors.length > 0
80
+ ? ` (${result.errors.length} error(s))`
81
+ : ""),
82
+ };
83
+ }
84
+ catch (err) {
85
+ const msg = err instanceof Error ? err.message : String(err);
86
+ ctx.logger.warn(`Mirror step failed (non-blocking): ${msg}`);
87
+ return {
88
+ durationMs: Date.now() - start,
89
+ status: "success",
90
+ summary: `Mirror failed (non-blocking): ${msg}`,
91
+ };
92
+ }
93
+ }
94
+ }