@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Pipeline step: Publish evaluation report to the report store.
3
+ *
4
+ * Inlines the logic from the former pipeline/steps/publish-report-step.ts.
5
+ * Uses ctx.reportStore and ctx.sinks from the composition root instead of
6
+ * constructing ReportStore and loadSinks() internally.
7
+ *
8
+ * Design principles:
9
+ * - P1: Reports are immutable events (write-once to Sanity)
10
+ * - P5: Local-first (pipeline never fails because of a store write)
11
+ * - P6: Sinks are fire-and-forget (failures logged, not thrown)
12
+ */
13
+ import type { AppContext, PipelineState, PipelineStep, PromptfooUrlEntry, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
14
+ export declare class PublishReportStep implements PipelineStep {
15
+ private readonly pipelineStart;
16
+ private readonly options;
17
+ readonly name = "publish-report";
18
+ readonly optional = true;
19
+ constructor(pipelineStart: number, options?: {
20
+ evalFingerprint?: string;
21
+ promptfooUrls?: PromptfooUrlEntry[];
22
+ publishTag?: string;
23
+ });
24
+ check(): ValidationIssue[];
25
+ execute(ctx: AppContext, state: PipelineState): Promise<StepResult>;
26
+ }
@@ -0,0 +1,216 @@
1
+ /**
2
+ * Pipeline step: Publish evaluation report to the report store.
3
+ *
4
+ * Inlines the logic from the former pipeline/steps/publish-report-step.ts.
5
+ * Uses ctx.reportStore and ctx.sinks from the composition root instead of
6
+ * constructing ReportStore and loadSinks() internally.
7
+ *
8
+ * Design principles:
9
+ * - P1: Reports are immutable events (write-once to Sanity)
10
+ * - P5: Local-first (pipeline never fails because of a store write)
11
+ * - P6: Sinks are fire-and-forget (failures logged, not thrown)
12
+ */
13
+ import { readFileSync } from "fs";
14
+ import { resolve } from "path";
15
+ import { checkScoreSummaryValid } from "../../pipeline/checks.js";
16
+ import { buildProvenance, } from "../../pipeline/provenance.js";
17
+ import { generateReportId } from "../../report-store.js";
18
+ import { withRetry } from "../../sinks/retry.js";
19
+ export class PublishReportStep {
20
+ pipelineStart;
21
+ options;
22
+ name = "publish-report";
23
+ optional = true;
24
+ constructor(pipelineStart, options = {}) {
25
+ this.pipelineStart = pipelineStart;
26
+ this.options = options;
27
+ }
28
+ check() {
29
+ return [];
30
+ }
31
+ async execute(ctx, state) {
32
+ const start = Date.now();
33
+ const { rootDir } = ctx.config;
34
+ // Precondition: score summary exists
35
+ const summaryIssues = checkScoreSummaryValid(rootDir);
36
+ const summaryErrors = summaryIssues.filter((i) => i.severity === "error");
37
+ if (summaryErrors.length > 0) {
38
+ return {
39
+ durationMs: Date.now() - start,
40
+ error: `Score summary missing: ${summaryErrors.map((e) => e.message).join("; ")}`,
41
+ status: "failed",
42
+ };
43
+ }
44
+ // Read score summary
45
+ let summary;
46
+ try {
47
+ const summaryPath = resolve(rootDir, "results", "latest", "score-summary.json");
48
+ summary = JSON.parse(readFileSync(summaryPath, "utf-8"));
49
+ }
50
+ catch (err) {
51
+ return {
52
+ durationMs: Date.now() - start,
53
+ error: `Failed to read score-summary.json: ${err instanceof Error ? err.message : String(err)}`,
54
+ status: "failed",
55
+ };
56
+ }
57
+ // Build provenance — prefer state values from upstream steps,
58
+ // fall back to constructor options for backward compatibility
59
+ const provenanceOptions = {
60
+ evalFingerprint: state.evalFingerprint ?? this.options.evalFingerprint,
61
+ promptfooUrls: state.promptfooUrls ?? this.options.promptfooUrls,
62
+ };
63
+ const provenanceInput = buildProvenanceInput(summary, ctx, provenanceOptions);
64
+ const provenance = buildProvenance(provenanceInput);
65
+ // Create report
66
+ const now = new Date().toISOString();
67
+ const reportId = generateReportId();
68
+ const durationMs = Date.now() - this.pipelineStart;
69
+ // Auto-compare against most recent comparable baseline
70
+ const comparison = ctx.reportStore
71
+ ? (await ctx.reportStore.autoCompare(summary, provenance, now))
72
+ : null;
73
+ const report = {
74
+ comparison: comparison ?? undefined,
75
+ completedAt: now,
76
+ durationMs,
77
+ id: reportId,
78
+ provenance,
79
+ summary,
80
+ tag: this.options.publishTag ?? ctx.config.publishTag,
81
+ };
82
+ // Share reportId with downstream steps (CallbackStep + orchestrator job update)
83
+ state.reportId = reportId;
84
+ // Write to store (system of record — best-effort, P5)
85
+ const sanityResult = ctx.reportStore
86
+ ? await ctx.reportStore.write(report)
87
+ : null;
88
+ // Run sinks (fire-and-forget, P6)
89
+ const publishResult = await runSinks(report, ctx);
90
+ // Build result summary
91
+ const parts = [];
92
+ if (sanityResult) {
93
+ parts.push(`report:${sanityResult}`);
94
+ }
95
+ else {
96
+ parts.push("Sanity write skipped (no token or unreachable)");
97
+ }
98
+ if (comparison) {
99
+ const delta = comparison.deltas.overall;
100
+ const sign = delta >= 0 ? "+" : "";
101
+ parts.push(`vs baseline: ${sign}${delta.toFixed(1)}`);
102
+ }
103
+ if (publishResult.sinkResults.length > 0) {
104
+ const succeeded = publishResult.sinkResults.filter((r) => r.result.status === "success").length;
105
+ const total = publishResult.sinkResults.length;
106
+ parts.push(`sinks: ${succeeded}/${total}`);
107
+ }
108
+ return {
109
+ durationMs: Date.now() - start,
110
+ status: "success",
111
+ summary: `Published — ${parts.join(", ")}`,
112
+ };
113
+ }
114
+ }
115
+ // ---------------------------------------------------------------------------
116
+ // Helpers
117
+ // ---------------------------------------------------------------------------
118
+ /**
119
+ * Assemble provenance input from the score summary and pipeline context.
120
+ */
121
+ function buildProvenanceInput(summary, ctx, options) {
122
+ const areas = summary.scores.map((s) => s.feature);
123
+ const mode = ctx.config.mode;
124
+ // Read document IDs from config
125
+ const sanityDocumentIds = ctx.config.sanityDocumentArgs;
126
+ // Read task filter from config
127
+ const taskIds = ctx.config.tasks;
128
+ // Build source from summary metadata or config
129
+ const source = {
130
+ baseUrl: summary.source?.baseUrl ?? "https://www.sanity.io/docs",
131
+ dataset: summary.source?.dataset ?? ctx.config.datasetOverride ?? "next",
132
+ documentIds: [],
133
+ llmsTxt: (summary.source?.baseUrl ?? "https://www.sanity.io/docs") + "/llms.txt",
134
+ name: summary.source?.name ?? "production",
135
+ perspective: summary.source?.perspective ??
136
+ ctx.config.perspectiveOverride ??
137
+ undefined,
138
+ priorityDomain: "sanity.io",
139
+ projectId: summary.source?.projectId ?? ctx.config.projectIdOverride ?? "3do82whm",
140
+ studioOrigin: "https://admin.sanity.io",
141
+ urls: [],
142
+ };
143
+ // Debug runs don't store fingerprints
144
+ const evalFingerprint = !ctx.config.debug?.enabled
145
+ ? options.evalFingerprint
146
+ : undefined;
147
+ return {
148
+ areas,
149
+ evalFingerprint,
150
+ mode,
151
+ promptfooUrls: options.promptfooUrls,
152
+ rootDir: ctx.config.rootDir,
153
+ sanityDocumentIds,
154
+ source,
155
+ taskIds,
156
+ };
157
+ }
158
+ /**
159
+ * Fan out a report to all configured sinks.
160
+ *
161
+ * Uses ctx.sinks from the composition root instead of loadSinks().
162
+ * Each sink is run with retry logic (3 attempts, exponential backoff).
163
+ * Failures are logged but never block the pipeline.
164
+ */
165
+ async function runSinks(report, ctx) {
166
+ const sinks = (ctx.sinks ?? []);
167
+ const sinkResults = [];
168
+ if (sinks.length === 0) {
169
+ return { report, sinkResults };
170
+ }
171
+ // Health check all sinks first (non-blocking)
172
+ for (const sink of sinks) {
173
+ if (sink.healthCheck) {
174
+ try {
175
+ const health = await sink.healthCheck();
176
+ if (!health.healthy) {
177
+ console.warn(` ⚠️ Sink ${sink.name} health check failed: ${health.reason}`);
178
+ }
179
+ }
180
+ catch (err) {
181
+ console.warn(` ⚠️ Sink ${sink.name} health check error: ${err instanceof Error ? err.message : String(err)}`);
182
+ }
183
+ }
184
+ }
185
+ // Publish to all sinks in parallel (fire-and-forget with retries)
186
+ const settled = await Promise.allSettled(sinks.map(async (sink) => {
187
+ const result = await withRetry(() => sink.publish(report));
188
+ return { name: sink.name, result };
189
+ }));
190
+ for (const outcome of settled) {
191
+ if (outcome.status === "fulfilled") {
192
+ sinkResults.push(outcome.value);
193
+ const { name, result } = outcome.value;
194
+ if (result.status === "failed") {
195
+ console.warn(` ⚠️ Sink ${name} failed: ${result.error}`);
196
+ }
197
+ else if (result.status === "skipped") {
198
+ console.log(` ⏭️ Sink ${name} skipped: ${result.reason}`);
199
+ }
200
+ else {
201
+ console.log(` ✅ Sink ${name} delivered${result.detail ? ` (${result.detail})` : ""}`);
202
+ }
203
+ }
204
+ else {
205
+ const error = outcome.reason instanceof Error
206
+ ? outcome.reason.message
207
+ : String(outcome.reason);
208
+ sinkResults.push({
209
+ name: "unknown",
210
+ result: { error, status: "failed" },
211
+ });
212
+ console.warn(` ⚠️ Sink delivery error: ${error}`);
213
+ }
214
+ }
215
+ return { report, sinkResults };
216
+ }
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Pipeline step: Launch readiness report.
3
+ *
4
+ * Calls pure functions from pipeline/readiness-report.ts directly.
5
+ * Optional step — failure doesn't stop the pipeline.
6
+ */
7
+ import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
8
+ export declare class ReadinessStep implements PipelineStep {
9
+ readonly name = "readiness";
10
+ readonly optional = true;
11
+ check(): ValidationIssue[];
12
+ execute(ctx: AppContext): Promise<StepResult>;
13
+ }
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Pipeline step: Launch readiness report.
3
+ *
4
+ * Calls pure functions from pipeline/readiness-report.ts directly.
5
+ * Optional step — failure doesn't stop the pipeline.
6
+ */
7
+ import { existsSync, readFileSync, writeFileSync } from "fs";
8
+ import { resolve } from "path";
9
+ import { load } from "js-yaml";
10
+ import { formatReadinessMarkdown, generateReadinessReport, } from "../../pipeline/readiness-report.js";
11
+ import { ThresholdConfigSchema } from "../../pipeline/schemas.js";
12
+ export class ReadinessStep {
13
+ name = "readiness";
14
+ optional = true;
15
+ check() {
16
+ return [];
17
+ }
18
+ async execute(ctx) {
19
+ const root = ctx.config.rootDir;
20
+ const start = Date.now();
21
+ try {
22
+ const scoreSummaryPath = resolve(root, "results", "latest", "score-summary.json");
23
+ const thresholdsPath = resolve(root, "config", "thresholds.yaml");
24
+ if (!existsSync(scoreSummaryPath)) {
25
+ return {
26
+ durationMs: Date.now() - start,
27
+ error: "score-summary.json not found",
28
+ status: "failed",
29
+ };
30
+ }
31
+ if (!existsSync(thresholdsPath)) {
32
+ return {
33
+ durationMs: Date.now() - start,
34
+ error: "config/thresholds.yaml not found",
35
+ status: "failed",
36
+ };
37
+ }
38
+ const scoreSummary = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
39
+ const rawThresholds = load(readFileSync(thresholdsPath, "utf-8"));
40
+ const thresholdConfig = ThresholdConfigSchema.parse(rawThresholds);
41
+ const gapPath = resolve(root, "results", "latest", "gap-analysis.json");
42
+ const gapAnalysis = existsSync(gapPath)
43
+ ? JSON.parse(readFileSync(gapPath, "utf-8"))
44
+ : undefined;
45
+ const readinessAreas = ctx.config.areas ?? scoreSummary.scores.map((s) => s.feature);
46
+ const readinessLines = [];
47
+ for (const area of readinessAreas) {
48
+ const areaScore = scoreSummary.scores.find((s) => s.feature === area);
49
+ if (!areaScore) {
50
+ ctx.logger.warn(`Area "${area}" not found in scores — skipping`);
51
+ continue;
52
+ }
53
+ const report = generateReadinessReport({
54
+ area,
55
+ gapAnalysis,
56
+ scoreSummary,
57
+ thresholdConfig,
58
+ });
59
+ const md = formatReadinessMarkdown(report);
60
+ readinessLines.push(md);
61
+ console.log(md);
62
+ }
63
+ if (readinessLines.length > 0) {
64
+ writeFileSync(resolve(root, "results", "latest", "readiness-report.md"), readinessLines.join("\n---\n\n"));
65
+ }
66
+ const passCount = readinessAreas.filter((area) => {
67
+ const areaScore = scoreSummary.scores.find((s) => s.feature === area);
68
+ if (!areaScore)
69
+ return false;
70
+ const report = generateReadinessReport({
71
+ area,
72
+ scoreSummary,
73
+ thresholdConfig,
74
+ });
75
+ return report.pass;
76
+ }).length;
77
+ return {
78
+ durationMs: Date.now() - start,
79
+ status: "success",
80
+ summary: `${passCount}/${readinessAreas.length} areas ready`,
81
+ };
82
+ }
83
+ catch (err) {
84
+ return {
85
+ durationMs: Date.now() - start,
86
+ error: err instanceof Error ? err.message : String(err),
87
+ status: "failed",
88
+ };
89
+ }
90
+ }
91
+ }
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Pipeline step: Generate PR comment / report from scores.
3
+ *
4
+ * Calls generatePrComment() from pipeline/pr-comment.ts with typed options.
5
+ * No env bridge or process.argv manipulation needed.
6
+ */
7
+ import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
8
+ export declare class ReportStep implements PipelineStep {
9
+ readonly name = "report";
10
+ check(): ValidationIssue[];
11
+ execute(ctx: AppContext): Promise<StepResult>;
12
+ }
@@ -0,0 +1,49 @@
1
+ /**
2
+ * Pipeline step: Generate PR comment / report from scores.
3
+ *
4
+ * Calls generatePrComment() from pipeline/pr-comment.ts with typed options.
5
+ * No env bridge or process.argv manipulation needed.
6
+ */
7
+ import { resolve } from "path";
8
+ import { checkScoreSummaryValid } from "../../pipeline/checks.js";
9
+ import { generatePrComment } from "../../pipeline/pr-comment.js";
10
+ const DEFAULT_REPORT_PATH = "results/latest/pr-comment.md";
11
+ export class ReportStep {
12
+ name = "report";
13
+ check() {
14
+ return [];
15
+ }
16
+ async execute(ctx) {
17
+ const start = Date.now();
18
+ // Precondition: score summary exists
19
+ const summaryIssues = checkScoreSummaryValid(ctx.config.rootDir);
20
+ const summaryErrors = summaryIssues.filter((i) => i.severity === "error");
21
+ if (summaryErrors.length > 0) {
22
+ return {
23
+ durationMs: Date.now() - start,
24
+ error: `Score summary missing: ${summaryErrors.map((e) => e.message).join("; ")}`,
25
+ status: "failed",
26
+ };
27
+ }
28
+ const resolvedOutput = ctx.config.outputPath ?? resolve(ctx.config.rootDir, DEFAULT_REPORT_PATH);
29
+ try {
30
+ generatePrComment({
31
+ outputPath: resolvedOutput,
32
+ promptfooUrl: ctx.config.promptfooUrl,
33
+ rootDir: ctx.config.rootDir,
34
+ });
35
+ }
36
+ catch (err) {
37
+ return {
38
+ durationMs: Date.now() - start,
39
+ error: `pr-comment failed: ${err instanceof Error ? err.message : String(err)}`,
40
+ status: "failed",
41
+ };
42
+ }
43
+ return {
44
+ durationMs: Date.now() - start,
45
+ status: "success",
46
+ summary: `Report written to ${resolvedOutput}`,
47
+ };
48
+ }
49
+ }
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Pipeline step: Run Promptfoo evaluation.
3
+ *
4
+ * Uses ctx.evalRunner (the EvalRunner port) for the actual Promptfoo
5
+ * invocation. Builds a clean env object for the subprocess instead of
6
+ * polluting global process.env.
7
+ */
8
+ import type { ConcreteEvalMode } from "../../_vendor/ailf-shared/index.d.ts";
9
+ import type { AppContext, PipelineState, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
10
+ export declare class RunEvalStep implements PipelineStep {
11
+ private readonly mode;
12
+ readonly name: string;
13
+ constructor(mode: ConcreteEvalMode);
14
+ check(): ValidationIssue[];
15
+ execute(ctx: AppContext, state: PipelineState): Promise<StepResult>;
16
+ cacheInputs(ctx: AppContext): string[];
17
+ }
@@ -0,0 +1,195 @@
1
+ /**
2
+ * Pipeline step: Run Promptfoo evaluation.
3
+ *
4
+ * Uses ctx.evalRunner (the EvalRunner port) for the actual Promptfoo
5
+ * invocation. Builds a clean env object for the subprocess instead of
6
+ * polluting global process.env.
7
+ */
8
+ import { existsSync, mkdirSync, writeFileSync } from "fs";
9
+ import { resolve } from "path";
10
+ import { getStepInputPaths } from "../../pipeline/cache.js";
11
+ import { checkCanonicalContextsExist, checkGeneratedConfigsExist, checkResultsExist, } from "../../pipeline/checks.js";
12
+ import { computeEvalFingerprint } from "../../pipeline/eval-fingerprint.js";
13
+ import { buildFilterFlags, CONFIG_FILES, RESULTS_FILES, scanResultsForErrors, } from "../../pipeline/eval-constants.js";
14
+ export class RunEvalStep {
15
+ mode;
16
+ name;
17
+ constructor(mode) {
18
+ this.mode = mode;
19
+ this.name = `eval-${mode}`;
20
+ }
21
+ check() {
22
+ return [];
23
+ }
24
+ async execute(ctx, state) {
25
+ if (ctx.config.skipEval) {
26
+ return { status: "skipped", reason: "--skip-eval" };
27
+ }
28
+ const start = Date.now();
29
+ const { rootDir, debug, concurrency, noCache } = ctx.config;
30
+ // Precondition: config file exists
31
+ const configIssues = checkGeneratedConfigsExist(rootDir);
32
+ const configErrors = configIssues.filter((i) => i.severity === "error");
33
+ if (configErrors.length > 0) {
34
+ return {
35
+ durationMs: Date.now() - start,
36
+ error: `Config files missing: ${configErrors.map((e) => e.message).join("; ")}`,
37
+ status: "failed",
38
+ };
39
+ }
40
+ // Precondition: canonical context files exist for filtered tasks.
41
+ // Must apply the same area/task filter as fetch-docs so we only
42
+ // check contexts that were actually fetched.
43
+ const filter = ctx.config.areas || ctx.config.tasks
44
+ ? {
45
+ ...(ctx.config.areas ? { areas: ctx.config.areas } : {}),
46
+ ...(ctx.config.tasks ? { taskIds: ctx.config.tasks } : {}),
47
+ }
48
+ : undefined;
49
+ const tasks = await ctx.taskSource.loadTasks(filter);
50
+ const taskIds = tasks.map((t) => t.id);
51
+ const contextIssues = checkCanonicalContextsExist(rootDir, taskIds);
52
+ const contextErrors = contextIssues.filter((i) => i.severity === "error");
53
+ if (contextErrors.length > 0) {
54
+ return {
55
+ durationMs: Date.now() - start,
56
+ error: `Context files missing. Run fetch-docs first. ${contextErrors.map((e) => e.message).join("; ")}`,
57
+ status: "failed",
58
+ };
59
+ }
60
+ // -----------------------------------------------------------------
61
+ // Compute eval fingerprint (for remote cache + provenance)
62
+ // -----------------------------------------------------------------
63
+ let evalFingerprint;
64
+ if (!debug?.enabled) {
65
+ try {
66
+ evalFingerprint = computeEvalFingerprint({
67
+ filter: ctx.config.areas || ctx.config.tasks
68
+ ? {
69
+ areas: ctx.config.areas,
70
+ taskIds: ctx.config.tasks,
71
+ }
72
+ : undefined,
73
+ graderModel: "default",
74
+ mode: this.mode,
75
+ rootDir,
76
+ });
77
+ // Share fingerprint with downstream steps (PublishReportStep)
78
+ state.evalFingerprint = evalFingerprint;
79
+ }
80
+ catch (err) {
81
+ console.warn(` ⚠️ Could not compute eval fingerprint: ${err instanceof Error ? err.message : String(err)}`);
82
+ }
83
+ }
84
+ // -----------------------------------------------------------------
85
+ // Remote cache check
86
+ // -----------------------------------------------------------------
87
+ if (evalFingerprint &&
88
+ !noCache &&
89
+ !ctx.config.noRemoteCache &&
90
+ ctx.reportStore) {
91
+ const remoteCacheResult = await checkRemoteCache(evalFingerprint, ctx.reportStore, rootDir);
92
+ if (remoteCacheResult) {
93
+ return {
94
+ durationMs: Date.now() - start,
95
+ status: "success",
96
+ summary: `Skipped (remote cache hit) — reusing report ${remoteCacheResult.reportId} from ${remoteCacheResult.completedAt}`,
97
+ };
98
+ }
99
+ }
100
+ // -----------------------------------------------------------------
101
+ // Build subprocess env explicitly (no global mutation)
102
+ // -----------------------------------------------------------------
103
+ const subprocessEnv = {
104
+ PROMPTFOO_DISABLE_SHARE_EMAIL_REQUEST: "1",
105
+ };
106
+ // Only set env vars that differ from defaults — the subprocess inherits
107
+ // process.env via PromptfooEvalAdapter's { ...process.env, ...config.env }
108
+ if (ctx.config.mode !== "baseline") {
109
+ subprocessEnv.EVAL_MODE = ctx.config.mode;
110
+ }
111
+ if (ctx.config.searchMode !== "open") {
112
+ subprocessEnv.EVAL_SEARCH_MODE = ctx.config.searchMode;
113
+ }
114
+ if (ctx.config.allowedOrigins?.length) {
115
+ subprocessEnv.DOC_ALLOWED_ORIGINS = ctx.config.allowedOrigins.join(",");
116
+ }
117
+ // -----------------------------------------------------------------
118
+ // Execute — use the EvalRunner port
119
+ // -----------------------------------------------------------------
120
+ const configFile = CONFIG_FILES[this.mode];
121
+ const filterFlags = buildFilterFlags(debug);
122
+ const result = await ctx.evalRunner.run({
123
+ concurrency,
124
+ configPath: configFile,
125
+ env: subprocessEnv,
126
+ filterFlags: filterFlags.trim() || undefined,
127
+ });
128
+ // Check if results were written despite non-zero exit
129
+ if (result.status === "failed") {
130
+ const resultsExist = checkResultsExist(rootDir, RESULTS_FILES[this.mode]);
131
+ const hasResults = resultsExist.filter((i) => i.severity === "error").length === 0;
132
+ if (!hasResults) {
133
+ return {
134
+ durationMs: Date.now() - start,
135
+ error: result.error ?? `promptfoo eval failed (mode: ${this.mode})`,
136
+ status: "failed",
137
+ };
138
+ }
139
+ }
140
+ // Postcondition: results file exists
141
+ const resultsIssues = checkResultsExist(rootDir, RESULTS_FILES[this.mode]);
142
+ const resultsErrors = resultsIssues.filter((i) => i.severity === "error");
143
+ if (resultsErrors.length > 0) {
144
+ return {
145
+ durationMs: Date.now() - start,
146
+ error: `Postcondition failed: ${resultsErrors.map((e) => e.message).join("; ")}`,
147
+ status: "failed",
148
+ };
149
+ }
150
+ // Scan results for errors
151
+ const errorSummary = scanResultsForErrors(resolve(rootDir, RESULTS_FILES[this.mode]));
152
+ if (errorSummary) {
153
+ console.log();
154
+ console.log(errorSummary);
155
+ }
156
+ const durationMs = Date.now() - start;
157
+ return {
158
+ durationMs,
159
+ status: "success",
160
+ summary: `Evaluation complete (mode: ${this.mode}${debug?.enabled ? ", debug" : ""})`,
161
+ };
162
+ }
163
+ cacheInputs(ctx) {
164
+ return getStepInputPaths(ctx.config.rootDir, `eval-${this.mode}`);
165
+ }
166
+ }
167
+ // ---------------------------------------------------------------------------
168
+ // Remote cache helpers
169
+ // ---------------------------------------------------------------------------
170
+ async function checkRemoteCache(fingerprint, reportStore, rootDir) {
171
+ try {
172
+ const startQuery = Date.now();
173
+ const cachedReport = (await reportStore.findByFingerprint(fingerprint));
174
+ const queryMs = Date.now() - startQuery;
175
+ if (!cachedReport) {
176
+ console.log(` ℹ️ Remote cache miss — no report matches fingerprint (${queryMs}ms)`);
177
+ return null;
178
+ }
179
+ const outDir = resolve(rootDir, "results", "latest");
180
+ if (!existsSync(outDir)) {
181
+ mkdirSync(outDir, { recursive: true });
182
+ }
183
+ writeFileSync(resolve(outDir, "score-summary.json"), JSON.stringify(cachedReport.summary, null, 2));
184
+ console.log(` ✅ Remote cache hit — reusing report ${cachedReport.id} from ${cachedReport.completedAt}`);
185
+ console.log(` ℹ️ Fingerprint: ${fingerprint.slice(0, 16)}... (${queryMs}ms)`);
186
+ return {
187
+ completedAt: cachedReport.completedAt,
188
+ reportId: cachedReport.id,
189
+ };
190
+ }
191
+ catch (err) {
192
+ console.warn(` ⚠️ Remote cache check failed: ${err instanceof Error ? err.message : String(err)}`);
193
+ return null;
194
+ }
195
+ }
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Pipeline step: Validate configuration.
3
+ *
4
+ * Wraps the existing validateConfiguration() + checkEnvironment() logic
5
+ * behind the PipelineStep interface.
6
+ */
7
+ import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
8
+ export declare class ValidateStep implements PipelineStep {
9
+ readonly name = "validate";
10
+ check(): ValidationIssue[];
11
+ execute(ctx: AppContext): Promise<StepResult>;
12
+ }
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Pipeline step: Validate configuration.
3
+ *
4
+ * Wraps the existing validateConfiguration() + checkEnvironment() logic
5
+ * behind the PipelineStep interface.
6
+ */
7
+ import { checkEnvironment } from "../../pipeline/checks.js";
8
+ import { validateConfiguration } from "../../pipeline/validate.js";
9
+ export class ValidateStep {
10
+ name = "validate";
11
+ check() {
12
+ // Validation step has no preconditions — it IS the precondition check.
13
+ return [];
14
+ }
15
+ async execute(ctx) {
16
+ const start = Date.now();
17
+ const validation = validateConfiguration(ctx.config.rootDir);
18
+ const envIssues = checkEnvironment(ctx.config.rootDir);
19
+ validation.issues.push(...envIssues);
20
+ const errors = validation.issues.filter((i) => i.severity === "error");
21
+ const warnings = validation.issues.filter((i) => i.severity === "warning");
22
+ for (const w of warnings) {
23
+ ctx.logger.warn(`[${w.source}] ${w.message}`);
24
+ }
25
+ if (errors.length > 0) {
26
+ for (const e of errors) {
27
+ ctx.logger.error(`[${e.source}] ${e.message}${e.path ? ` at ${e.path}` : ""}`);
28
+ }
29
+ return {
30
+ durationMs: Date.now() - start,
31
+ error: `Configuration invalid: ${errors.length} error(s)`,
32
+ status: "failed",
33
+ };
34
+ }
35
+ return {
36
+ durationMs: Date.now() - start,
37
+ status: "success",
38
+ summary: `Configuration valid (${warnings.length} warning(s))`,
39
+ };
40
+ }
41
+ }