@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,50 @@
1
+ /**
2
+ * pipeline/callback-delivery.ts
3
+ *
4
+ * Delivers evaluation results to a callback URL with HMAC signature.
5
+ * Used by the API-triggered pipeline flow: after the report is published
6
+ * to the Content Lake (system of record), the result is also POSTed
7
+ * to the caller's callback URL.
8
+ *
9
+ * Fire-and-forget with 3 retries and exponential backoff. If callback
10
+ * fails, log a warning — the result is still in the Content Lake.
11
+ *
12
+ * @see docs/design-docs/api-service-gateway.md
13
+ */
14
+ import type { CallbackPayload } from "../_vendor/ailf-core/index.d.ts";
15
+ export type { CallbackPayload } from "../_vendor/ailf-core/index.d.ts";
16
+ export interface CallbackConfig {
17
+ /** Optional custom headers to include in the callback request */
18
+ headers?: Record<string, string>;
19
+ /** The callback URL to POST results to */
20
+ url: string;
21
+ }
22
+ /**
23
+ * Generate an HMAC-SHA256 signature for the callback payload.
24
+ *
25
+ * The signature is computed over the raw JSON body using the callback URL
26
+ * as the signing key. This allows the receiver to verify the request
27
+ * originated from AILF without requiring a pre-shared secret.
28
+ *
29
+ * The receiver should:
30
+ * 1. Read the raw request body
31
+ * 2. Compute HMAC-SHA256(body, callbackUrl)
32
+ * 3. Compare with the X-AILF-Signature header
33
+ */
34
+ export declare function computeSignature(body: string, signingKey: string): string;
35
+ /**
36
+ * Deliver evaluation results to a callback URL.
37
+ *
38
+ * POSTs the payload as JSON with:
39
+ * - Content-Type: application/json
40
+ * - X-AILF-Signature: HMAC-SHA256 signature
41
+ * - Any custom headers from the callback config
42
+ *
43
+ * Retries up to 3 times with exponential backoff (1s → 2s → 4s).
44
+ * Returns success/failure status — never throws.
45
+ */
46
+ export declare function deliverCallback(callback: CallbackConfig, payload: CallbackPayload): Promise<{
47
+ ok: boolean;
48
+ error?: string;
49
+ attempts: number;
50
+ }>;
@@ -0,0 +1,89 @@
1
+ /**
2
+ * pipeline/callback-delivery.ts
3
+ *
4
+ * Delivers evaluation results to a callback URL with HMAC signature.
5
+ * Used by the API-triggered pipeline flow: after the report is published
6
+ * to the Content Lake (system of record), the result is also POSTed
7
+ * to the caller's callback URL.
8
+ *
9
+ * Fire-and-forget with 3 retries and exponential backoff. If callback
10
+ * fails, log a warning — the result is still in the Content Lake.
11
+ *
12
+ * @see docs/design-docs/api-service-gateway.md
13
+ */
14
+ import { createHmac } from "crypto";
15
+ // ---------------------------------------------------------------------------
16
+ // Constants
17
+ // ---------------------------------------------------------------------------
18
+ const MAX_RETRIES = 3;
19
+ const BASE_DELAY_MS = 1000;
20
+ const CALLBACK_TIMEOUT_MS = 10_000;
21
+ // ---------------------------------------------------------------------------
22
+ // HMAC Signature
23
+ // ---------------------------------------------------------------------------
24
+ /**
25
+ * Generate an HMAC-SHA256 signature for the callback payload.
26
+ *
27
+ * The signature is computed over the raw JSON body using the callback URL
28
+ * as the signing key. This allows the receiver to verify the request
29
+ * originated from AILF without requiring a pre-shared secret.
30
+ *
31
+ * The receiver should:
32
+ * 1. Read the raw request body
33
+ * 2. Compute HMAC-SHA256(body, callbackUrl)
34
+ * 3. Compare with the X-AILF-Signature header
35
+ */
36
+ export function computeSignature(body, signingKey) {
37
+ return createHmac("sha256", signingKey).update(body).digest("hex");
38
+ }
39
+ // ---------------------------------------------------------------------------
40
+ // Delivery
41
+ // ---------------------------------------------------------------------------
42
+ /**
43
+ * Deliver evaluation results to a callback URL.
44
+ *
45
+ * POSTs the payload as JSON with:
46
+ * - Content-Type: application/json
47
+ * - X-AILF-Signature: HMAC-SHA256 signature
48
+ * - Any custom headers from the callback config
49
+ *
50
+ * Retries up to 3 times with exponential backoff (1s → 2s → 4s).
51
+ * Returns success/failure status — never throws.
52
+ */
53
+ export async function deliverCallback(callback, payload) {
54
+ const body = JSON.stringify(payload);
55
+ const signature = computeSignature(body, callback.url);
56
+ let lastError;
57
+ let attempts = 0;
58
+ for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
59
+ attempts = attempt + 1;
60
+ try {
61
+ const controller = new AbortController();
62
+ const timeout = setTimeout(() => controller.abort(), CALLBACK_TIMEOUT_MS);
63
+ const response = await fetch(callback.url, {
64
+ method: "POST",
65
+ headers: {
66
+ "Content-Type": "application/json",
67
+ "X-AILF-Signature": signature,
68
+ ...(callback.headers ?? {}),
69
+ },
70
+ body,
71
+ signal: controller.signal,
72
+ });
73
+ clearTimeout(timeout);
74
+ if (response.ok) {
75
+ return { ok: true, attempts };
76
+ }
77
+ lastError = `HTTP ${response.status}: ${response.statusText}`;
78
+ }
79
+ catch (err) {
80
+ lastError = err instanceof Error ? err.message : String(err);
81
+ }
82
+ // Don't delay after the last attempt
83
+ if (attempt < MAX_RETRIES) {
84
+ const delayMs = BASE_DELAY_MS * Math.pow(2, attempt);
85
+ await new Promise((resolve) => setTimeout(resolve, delayMs));
86
+ }
87
+ }
88
+ return { ok: false, error: lastError, attempts };
89
+ }
@@ -0,0 +1,39 @@
1
+ /**
2
+ * pipeline/checks.ts
3
+ *
4
+ * Pre/postcondition checking for pipeline steps.
5
+ * Each function verifies filesystem or environment state and returns
6
+ * ValidationIssue[] — an empty array means all checks passed.
7
+ */
8
+ import type { ValidationIssue } from "./types.js";
9
+ /**
10
+ * Check that `contexts/canonical/<taskId>.md` exists and is non-empty
11
+ * for every task ID.
12
+ */
13
+ export declare function checkCanonicalContextsExist(rootDir: string, taskIds: string[]): ValidationIssue[];
14
+ /**
15
+ * Check that `contexts/<area>.md` exists and is non-empty for every area.
16
+ * This is the precondition for the eval step — it cannot run without docs.
17
+ */
18
+ export declare function checkContextsExist(rootDir: string, areas: string[]): ValidationIssue[];
19
+ /**
20
+ * Check that required environment variables are set.
21
+ * Loads the root `.env` file first (with override, matching the dotenv CLI
22
+ * `-o` flag used by other scripts), then checks for required keys.
23
+ */
24
+ export declare function checkEnvironment(rootDir: string): ValidationIssue[];
25
+ /**
26
+ * Check that the baseline `promptfooconfig.yaml` exists. Optionally check
27
+ * for `promptfooconfig.observed.yaml` and `promptfooconfig.agentic.yaml`.
28
+ */
29
+ export declare function checkGeneratedConfigsExist(rootDir: string): ValidationIssue[];
30
+ /**
31
+ * Check that the eval results JSON file exists, is valid JSON, and contains
32
+ * a `results` array.
33
+ */
34
+ export declare function checkResultsExist(rootDir: string, resultsPath?: string): ValidationIssue[];
35
+ /**
36
+ * Check that `results/latest/score-summary.json` exists, parses as JSON,
37
+ * contains feature area scores, and no scores are NaN or out of 0–100.
38
+ */
39
+ export declare function checkScoreSummaryValid(rootDir: string): ValidationIssue[];
@@ -0,0 +1,280 @@
1
+ /**
2
+ * pipeline/checks.ts
3
+ *
4
+ * Pre/postcondition checking for pipeline steps.
5
+ * Each function verifies filesystem or environment state and returns
6
+ * ValidationIssue[] — an empty array means all checks passed.
7
+ */
8
+ import { config as loadEnv } from "dotenv";
9
+ import { existsSync, readFileSync, statSync } from "fs";
10
+ import { join, resolve } from "path";
11
+ // ---------------------------------------------------------------------------
12
+ // Precondition: contexts exist for each feature area
13
+ // ---------------------------------------------------------------------------
14
+ /**
15
+ * Check that `contexts/canonical/<taskId>.md` exists and is non-empty
16
+ * for every task ID.
17
+ */
18
+ export function checkCanonicalContextsExist(rootDir, taskIds) {
19
+ const issues = [];
20
+ const canonicalDir = resolve(rootDir, "contexts", "canonical");
21
+ for (const taskId of taskIds) {
22
+ const filePath = join(canonicalDir, `${taskId}.md`);
23
+ if (!existsSync(filePath)) {
24
+ issues.push({
25
+ message: `Missing canonical context for task "${taskId}". Run \`pnpm fetch-docs\` to generate it.`,
26
+ path: filePath,
27
+ severity: "error",
28
+ source: "checkCanonicalContextsExist",
29
+ });
30
+ continue;
31
+ }
32
+ const stat = statSync(filePath);
33
+ if (stat.size === 0) {
34
+ issues.push({
35
+ message: `Canonical context for task "${taskId}" is empty. Re-run \`pnpm fetch-docs\`.`,
36
+ path: filePath,
37
+ severity: "error",
38
+ source: "checkCanonicalContextsExist",
39
+ });
40
+ }
41
+ }
42
+ return issues;
43
+ }
44
+ // ---------------------------------------------------------------------------
45
+ // Precondition: canonical context files exist per task
46
+ // ---------------------------------------------------------------------------
47
+ /**
48
+ * Check that `contexts/<area>.md` exists and is non-empty for every area.
49
+ * This is the precondition for the eval step — it cannot run without docs.
50
+ */
51
+ export function checkContextsExist(rootDir, areas) {
52
+ const issues = [];
53
+ const contextsDir = resolve(rootDir, "contexts");
54
+ for (const area of areas) {
55
+ const filePath = join(contextsDir, `${area}.md`);
56
+ if (!existsSync(filePath)) {
57
+ issues.push({
58
+ message: `Missing context file for area "${area}". Run \`pnpm fetch-docs\` to generate it.`,
59
+ path: filePath,
60
+ severity: "error",
61
+ source: "checkContextsExist",
62
+ });
63
+ continue;
64
+ }
65
+ const stat = statSync(filePath);
66
+ if (stat.size === 0) {
67
+ issues.push({
68
+ message: `Context file for area "${area}" is empty. Re-run \`pnpm fetch-docs\`.`,
69
+ path: filePath,
70
+ severity: "error",
71
+ source: "checkContextsExist",
72
+ });
73
+ }
74
+ }
75
+ return issues;
76
+ }
77
+ // ---------------------------------------------------------------------------
78
+ // Postcondition: eval results exist and are valid JSON
79
+ // ---------------------------------------------------------------------------
80
+ /**
81
+ * Check that required environment variables are set.
82
+ * Loads the root `.env` file first (with override, matching the dotenv CLI
83
+ * `-o` flag used by other scripts), then checks for required keys.
84
+ */
85
+ export function checkEnvironment(rootDir) {
86
+ const issues = [];
87
+ // Load root .env so we see the same vars as dotenv -e ../../.env -o
88
+ const envPath = resolve(rootDir, "..", "..", ".env");
89
+ if (existsSync(envPath)) {
90
+ loadEnv({ override: true, path: envPath });
91
+ }
92
+ if (!process.env.OPENAI_API_KEY) {
93
+ issues.push({
94
+ message: "OPENAI_API_KEY is not set. Ensure it is defined in the root .env file.",
95
+ severity: "warning",
96
+ source: "checkEnvironment",
97
+ });
98
+ }
99
+ if (!process.env.SANITY_API_TOKEN) {
100
+ issues.push({
101
+ message: "SANITY_API_TOKEN is not set. It is needed for 'turbo fetch-docs'. Ensure it is defined in the root .env file.",
102
+ severity: "warning",
103
+ source: "checkEnvironment",
104
+ });
105
+ }
106
+ return issues;
107
+ }
108
+ // ---------------------------------------------------------------------------
109
+ // Postcondition: score summary is valid
110
+ // ---------------------------------------------------------------------------
111
+ /**
112
+ * Check that the baseline `promptfooconfig.yaml` exists. Optionally check
113
+ * for `promptfooconfig.observed.yaml` and `promptfooconfig.agentic.yaml`.
114
+ */
115
+ export function checkGeneratedConfigsExist(rootDir) {
116
+ const issues = [];
117
+ const baselinePath = resolve(rootDir, "promptfooconfig.yaml");
118
+ if (!existsSync(baselinePath)) {
119
+ issues.push({
120
+ message: "Baseline config 'promptfooconfig.yaml' not found. Run 'pnpm generate-configs'.",
121
+ path: baselinePath,
122
+ severity: "error",
123
+ source: "checkGeneratedConfigsExist",
124
+ });
125
+ }
126
+ const optionalConfigs = [
127
+ "promptfooconfig.observed.yaml",
128
+ "promptfooconfig.agentic.yaml",
129
+ ];
130
+ for (const name of optionalConfigs) {
131
+ const configPath = resolve(rootDir, name);
132
+ if (!existsSync(configPath)) {
133
+ issues.push({
134
+ message: `Optional config \`${name}\` not found. Run \`pnpm generate-configs\` to create it.`,
135
+ path: configPath,
136
+ severity: "warning",
137
+ source: "checkGeneratedConfigsExist",
138
+ });
139
+ }
140
+ }
141
+ return issues;
142
+ }
143
+ // ---------------------------------------------------------------------------
144
+ // Precondition: generated promptfoo configs exist
145
+ // ---------------------------------------------------------------------------
146
+ /**
147
+ * Check that the eval results JSON file exists, is valid JSON, and contains
148
+ * a `results` array.
149
+ */
150
+ export function checkResultsExist(rootDir, resultsPath) {
151
+ const issues = [];
152
+ const filePath = resolve(rootDir, resultsPath ?? join("results", "latest", "eval-results.json"));
153
+ if (!existsSync(filePath)) {
154
+ issues.push({
155
+ message: "Eval results file not found. Run 'pnpm eval' first.",
156
+ path: filePath,
157
+ severity: "error",
158
+ source: "checkResultsExist",
159
+ });
160
+ return issues;
161
+ }
162
+ let parsed;
163
+ try {
164
+ const raw = readFileSync(filePath, "utf-8");
165
+ parsed = JSON.parse(raw);
166
+ }
167
+ catch (err) {
168
+ issues.push({
169
+ message: `Eval results file is not valid JSON: ${err instanceof Error ? err.message : String(err)}`,
170
+ path: filePath,
171
+ severity: "error",
172
+ source: "checkResultsExist",
173
+ });
174
+ return issues;
175
+ }
176
+ if (typeof parsed !== "object" || parsed === null || !("results" in parsed)) {
177
+ issues.push({
178
+ message: 'Eval results file is missing the "results" key.',
179
+ path: filePath,
180
+ severity: "error",
181
+ source: "checkResultsExist",
182
+ });
183
+ return issues;
184
+ }
185
+ // Promptfoo wraps results in: { results: { results: [...], stats: {...} } }
186
+ const outer = parsed.results;
187
+ const resultsArray = Array.isArray(outer)
188
+ ? outer
189
+ : typeof outer === "object" &&
190
+ outer !== null &&
191
+ "results" in outer &&
192
+ Array.isArray(outer.results)
193
+ ? outer.results
194
+ : null;
195
+ if (!resultsArray) {
196
+ issues.push({
197
+ message: 'The "results" key in eval results is not an array (or results.results).',
198
+ path: filePath,
199
+ severity: "error",
200
+ source: "checkResultsExist",
201
+ });
202
+ }
203
+ return issues;
204
+ }
205
+ // ---------------------------------------------------------------------------
206
+ // Environment variable checks
207
+ // ---------------------------------------------------------------------------
208
+ /**
209
+ * Check that `results/latest/score-summary.json` exists, parses as JSON,
210
+ * contains feature area scores, and no scores are NaN or out of 0–100.
211
+ */
212
+ export function checkScoreSummaryValid(rootDir) {
213
+ const issues = [];
214
+ const filePath = resolve(rootDir, "results", "latest", "score-summary.json");
215
+ if (!existsSync(filePath)) {
216
+ issues.push({
217
+ message: "Score summary not found. Run 'pnpm report' to generate it.",
218
+ path: filePath,
219
+ severity: "error",
220
+ source: "checkScoreSummaryValid",
221
+ });
222
+ return issues;
223
+ }
224
+ let parsed;
225
+ try {
226
+ const raw = readFileSync(filePath, "utf-8");
227
+ parsed = JSON.parse(raw);
228
+ }
229
+ catch (err) {
230
+ issues.push({
231
+ message: `Score summary is not valid JSON: ${err instanceof Error ? err.message : String(err)}`,
232
+ path: filePath,
233
+ severity: "error",
234
+ source: "checkScoreSummaryValid",
235
+ });
236
+ return issues;
237
+ }
238
+ if (typeof parsed !== "object" || parsed === null) {
239
+ issues.push({
240
+ message: "Score summary is not a JSON object.",
241
+ path: filePath,
242
+ severity: "error",
243
+ source: "checkScoreSummaryValid",
244
+ });
245
+ return issues;
246
+ }
247
+ const record = parsed;
248
+ const scoreKeys = Object.keys(record);
249
+ if (scoreKeys.length === 0) {
250
+ issues.push({
251
+ message: "Score summary contains no feature area scores.",
252
+ path: filePath,
253
+ severity: "error",
254
+ source: "checkScoreSummaryValid",
255
+ });
256
+ return issues;
257
+ }
258
+ for (const key of scoreKeys) {
259
+ const value = record[key];
260
+ if (typeof value === "number") {
261
+ if (Number.isNaN(value)) {
262
+ issues.push({
263
+ message: `Score for "${key}" is NaN.`,
264
+ path: filePath,
265
+ severity: "error",
266
+ source: "checkScoreSummaryValid",
267
+ });
268
+ }
269
+ else if (value < 0 || value > 100) {
270
+ issues.push({
271
+ message: `Score for "${key}" is ${value}, which is outside the 0–100 range.`,
272
+ path: filePath,
273
+ severity: "warning",
274
+ source: "checkScoreSummaryValid",
275
+ });
276
+ }
277
+ }
278
+ }
279
+ return issues;
280
+ }
@@ -0,0 +1,61 @@
1
+ /**
2
+ * classify-url.ts
3
+ *
4
+ * Classifies URLs passed via --url/--urls into specific source types.
5
+ * Enables intelligent inference: a Sanity Studio release URL automatically
6
+ * sets the perspective, a Studio document URL extracts the document ID, etc.
7
+ *
8
+ * Classification rules (applied in order):
9
+ * 1. /releases/<id> → Sanity Release (extract perspective)
10
+ * 2. /structure/...;<uuid> → Sanity Studio Document (extract doc ID + optional perspective)
11
+ * 3. everything else → Direct URL (fetch as documentation)
12
+ */
13
+ /** Discriminated union of all classified URL types */
14
+ export type ClassifiedUrl = DirectUrl | SanityDocumentUrl | SanityReleaseUrl;
15
+ /** A URL classified as a direct documentation page (HTML, Markdown, etc.) */
16
+ export interface DirectUrl {
17
+ type: "direct-url";
18
+ url: string;
19
+ }
20
+ /** A URL classified as a Sanity Studio document URL */
21
+ export interface SanityDocumentUrl {
22
+ documentId: string;
23
+ perspectiveId?: string;
24
+ studioOrigin: string;
25
+ type: "sanity-document";
26
+ }
27
+ /** A URL classified as a Sanity Studio release URL */
28
+ export interface SanityReleaseUrl {
29
+ perspectiveId: string;
30
+ studioOrigin: string;
31
+ type: "sanity-release";
32
+ }
33
+ /** Result of classifying multiple URLs */
34
+ export interface ClassificationResult {
35
+ /** URLs classified as direct documentation pages */
36
+ directUrls: string[];
37
+ /** Sanity document IDs extracted from Studio document URLs */
38
+ documentIds: string[];
39
+ /** Perspective ID inferred from release or document URLs (first wins) */
40
+ inferredPerspective?: string;
41
+ /** Studio origin inferred from Sanity URLs (first wins) */
42
+ inferredStudioOrigin?: string;
43
+ }
44
+ /**
45
+ * Classify a single URL into one of the known source types.
46
+ *
47
+ * @param input - A URL string to classify
48
+ * @returns A discriminated union describing the URL type and extracted metadata
49
+ * @throws If the input is not a valid URL
50
+ */
51
+ export declare function classifyUrl(input: string): ClassifiedUrl;
52
+ /**
53
+ * Classify an array of URLs and aggregate the results.
54
+ *
55
+ * Collects direct URLs and document IDs into separate lists. Perspective
56
+ * and studio origin are inferred from the first Sanity URL that provides them.
57
+ *
58
+ * @param urls - Array of URL strings to classify
59
+ * @returns Aggregated classification result
60
+ */
61
+ export declare function classifyUrls(urls: string[]): ClassificationResult;
@@ -0,0 +1,93 @@
1
+ /**
2
+ * classify-url.ts
3
+ *
4
+ * Classifies URLs passed via --url/--urls into specific source types.
5
+ * Enables intelligent inference: a Sanity Studio release URL automatically
6
+ * sets the perspective, a Studio document URL extracts the document ID, etc.
7
+ *
8
+ * Classification rules (applied in order):
9
+ * 1. /releases/<id> → Sanity Release (extract perspective)
10
+ * 2. /structure/...;<uuid> → Sanity Studio Document (extract doc ID + optional perspective)
11
+ * 3. everything else → Direct URL (fetch as documentation)
12
+ */
13
+ // ---------------------------------------------------------------------------
14
+ // Patterns
15
+ // ---------------------------------------------------------------------------
16
+ /** UUID v4 pattern (case-insensitive) */
17
+ const UUID_PATTERN = /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/i;
18
+ /** Matches /releases/<perspective-id> at the end of a pathname */
19
+ const RELEASE_PATH = /\/releases\/([\w-]+)$/;
20
+ /**
21
+ * Matches /structure/...;<last-segment> where the last semicolon-delimited
22
+ * segment is extracted. The UUID check is applied separately.
23
+ */
24
+ const STRUCTURE_PATH = /\/structure\/.+;([^;?]+)/;
25
+ // ---------------------------------------------------------------------------
26
+ // Batch classification
27
+ // ---------------------------------------------------------------------------
28
+ /**
29
+ * Classify a single URL into one of the known source types.
30
+ *
31
+ * @param input - A URL string to classify
32
+ * @returns A discriminated union describing the URL type and extracted metadata
33
+ * @throws If the input is not a valid URL
34
+ */
35
+ export function classifyUrl(input) {
36
+ const parsed = new URL(input);
37
+ // Rule 1: Sanity Studio release URL — /releases/<perspective-id>
38
+ const releaseMatch = parsed.pathname.match(RELEASE_PATH);
39
+ if (releaseMatch) {
40
+ return {
41
+ perspectiveId: releaseMatch[1],
42
+ studioOrigin: parsed.origin,
43
+ type: "sanity-release",
44
+ };
45
+ }
46
+ // Rule 2: Sanity Studio document URL — /structure/...;<uuid>
47
+ const structureMatch = parsed.pathname.match(STRUCTURE_PATH);
48
+ if (structureMatch && UUID_PATTERN.test(structureMatch[1])) {
49
+ return {
50
+ documentId: structureMatch[1],
51
+ perspectiveId: parsed.searchParams.get("perspective") ?? undefined,
52
+ studioOrigin: parsed.origin,
53
+ type: "sanity-document",
54
+ };
55
+ }
56
+ // Rule 3: Default — direct documentation URL
57
+ return { type: "direct-url", url: input };
58
+ }
59
+ /**
60
+ * Classify an array of URLs and aggregate the results.
61
+ *
62
+ * Collects direct URLs and document IDs into separate lists. Perspective
63
+ * and studio origin are inferred from the first Sanity URL that provides them.
64
+ *
65
+ * @param urls - Array of URL strings to classify
66
+ * @returns Aggregated classification result
67
+ */
68
+ export function classifyUrls(urls) {
69
+ const result = {
70
+ directUrls: [],
71
+ documentIds: [],
72
+ };
73
+ for (const url of urls) {
74
+ const classified = classifyUrl(url);
75
+ switch (classified.type) {
76
+ case "direct-url":
77
+ result.directUrls.push(classified.url);
78
+ break;
79
+ case "sanity-document":
80
+ result.documentIds.push(classified.documentId);
81
+ if (classified.perspectiveId && !result.inferredPerspective) {
82
+ result.inferredPerspective = classified.perspectiveId;
83
+ }
84
+ result.inferredStudioOrigin ??= classified.studioOrigin;
85
+ break;
86
+ case "sanity-release":
87
+ result.inferredPerspective ??= classified.perspectiveId;
88
+ result.inferredStudioOrigin ??= classified.studioOrigin;
89
+ break;
90
+ }
91
+ }
92
+ return result;
93
+ }
@@ -0,0 +1,31 @@
1
+ /**
2
+ * pipeline/compare.ts
3
+ *
4
+ * Core comparison primitive for the evaluation framework.
5
+ *
6
+ * Takes two ScoreSummary objects (baseline and experiment) and produces a
7
+ * structured ComparisonReport with overall, per-area, and per-dimension
8
+ * deltas, plus improved/regressed/unchanged classification.
9
+ *
10
+ * This is the single function that backs all comparison scenarios:
11
+ * doc improvement, model comparison, branch validation, etc.
12
+ * What varies is what produced each ScoreSummary — the comparison
13
+ * logic is always the same.
14
+ *
15
+ * @see docs/ideas/evaluation-roadmap.md — BP5: Make comparison a primitive
16
+ * @see docs/ideas/metrics-design.md — Tier 4: Comparison results
17
+ */
18
+ import { type ChangeClass, type CompareOptions, type ComparisonReport, type ScoreSummary } from "./types.js";
19
+ /** Classify a delta as improved, regressed, or unchanged given a threshold */
20
+ export declare function classifyChange(delta: number, threshold: number): ChangeClass;
21
+ /**
22
+ * Compare two evaluation score summaries and produce a structured report.
23
+ *
24
+ * This is a pure function — no side effects, no file I/O.
25
+ *
26
+ * @param baseline The "before" or "control" score summary
27
+ * @param experiment The "after" or "treatment" score summary
28
+ * @param options Optional configuration (noise threshold, etc.)
29
+ * @returns A ComparisonReport with deltas, classifications, and breakdowns
30
+ */
31
+ export declare function compare(baseline: ScoreSummary, experiment: ScoreSummary, options?: CompareOptions): ComparisonReport;