@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,101 @@
1
+ /**
2
+ * pipeline/cache.ts
3
+ *
4
+ * Content-aware caching for pipeline steps. Each step's cache key is derived
5
+ * from a hash of its inputs (config files, context files, task files, etc.).
6
+ * When inputs haven't changed between runs, cached outputs are reused.
7
+ *
8
+ * Cache storage: `results/cache/` directory with one JSON manifest per step.
9
+ * Each manifest maps a content hash to the step's output metadata.
10
+ *
11
+ * Cache invalidation triggers:
12
+ * - Content change: any input file's content changes → hash changes → miss
13
+ * - Config change: config/models.yaml, config/sources.yaml, tasks/*.yaml changes → miss
14
+ * - Manual bypass: --no-cache flag skips all cache lookups
15
+ * - Cache clear: delete results/cache/ to start fresh
16
+ */
17
+ /** A single cache entry — stored per step per unique input hash */
18
+ export interface CacheEntry {
19
+ /** Duration of the original execution in milliseconds */
20
+ durationMs: number;
21
+ /** SHA-256 hash of all input content */
22
+ inputHash: string;
23
+ /** Output file paths that were produced (relative to package root) */
24
+ outputPaths: string[];
25
+ /** Human-readable summary of what was cached */
26
+ summary: string;
27
+ /** When this entry was created */
28
+ timestamp: string;
29
+ }
30
+ /** Result of a cache lookup */
31
+ export type CacheLookupResult = {
32
+ hit: false;
33
+ currentHash: string;
34
+ } | {
35
+ hit: true;
36
+ entry: CacheEntry;
37
+ };
38
+ /** The cache manifest for a single pipeline step */
39
+ export interface CacheManifest {
40
+ /** The most recent cache entry */
41
+ entry: CacheEntry | null;
42
+ /** Step name (e.g., "fetch-docs", "generate-configs") */
43
+ step: string;
44
+ }
45
+ /** Stats collected across all pipeline steps for reporting */
46
+ export interface CacheStats {
47
+ /** Steps where cache was hit (skipped execution) */
48
+ hits: number;
49
+ /** Steps where cache was missed (executed normally) */
50
+ misses: number;
51
+ /** Steps that were skipped for other reasons (--skip-fetch, etc.) */
52
+ skipped: number;
53
+ /** Per-step detail */
54
+ steps: Record<string, "disabled" | "hit" | "miss" | "skipped">;
55
+ /** Total steps that participated in caching */
56
+ total: number;
57
+ }
58
+ /** Create an empty CacheStats object */
59
+ export declare function createCacheStats(): CacheStats;
60
+ /** Format cache stats as a human-readable summary line */
61
+ export declare function formatCacheStats(stats: CacheStats): string;
62
+ /**
63
+ * Collect all file paths that serve as inputs for a given pipeline step.
64
+ * This is the core of cache key computation — if any of these files change,
65
+ * the step must re-execute.
66
+ */
67
+ export declare function getStepInputPaths(rootDir: string, step: string): string[];
68
+ /**
69
+ * Compute a SHA-256 hash of the concatenated content of multiple files.
70
+ * Files are sorted by path for deterministic ordering. Missing files
71
+ * contribute a sentinel value so the hash changes if a file is deleted.
72
+ *
73
+ * Optional `context` strings are included in the hash so that non-file
74
+ * state (e.g., filter flags, environment variables) can also participate
75
+ * in cache key computation.
76
+ */
77
+ export declare function hashFiles(paths: string[], context?: string[]): string;
78
+ /**
79
+ * Look up the cache for a pipeline step.
80
+ *
81
+ * Computes the current input hash and compares it against the stored manifest.
82
+ * Returns a hit if the hashes match AND all expected output files still exist.
83
+ *
84
+ * Optional `context` strings are included in the hash so that non-file
85
+ * state (e.g., area/task filter flags) participates in cache key computation.
86
+ */
87
+ export declare function lookupCache(rootDir: string, step: string, context?: string[]): CacheLookupResult;
88
+ /**
89
+ * Read the cache manifest for a step.
90
+ * Returns null if no manifest exists or it's corrupt.
91
+ */
92
+ export declare function readManifest(rootDir: string, step: string): CacheManifest | null;
93
+ /**
94
+ * Record a cache entry after a successful step execution.
95
+ */
96
+ export declare function recordCache(rootDir: string, step: string, inputHash: string, summary: string, durationMs: number, outputPaths: string[]): void;
97
+ /**
98
+ * Write a cache manifest for a step.
99
+ * Creates the cache directory if it doesn't exist.
100
+ */
101
+ export declare function writeManifest(rootDir: string, step: string, entry: CacheEntry): void;
@@ -0,0 +1,283 @@
1
+ /**
2
+ * pipeline/cache.ts
3
+ *
4
+ * Content-aware caching for pipeline steps. Each step's cache key is derived
5
+ * from a hash of its inputs (config files, context files, task files, etc.).
6
+ * When inputs haven't changed between runs, cached outputs are reused.
7
+ *
8
+ * Cache storage: `results/cache/` directory with one JSON manifest per step.
9
+ * Each manifest maps a content hash to the step's output metadata.
10
+ *
11
+ * Cache invalidation triggers:
12
+ * - Content change: any input file's content changes → hash changes → miss
13
+ * - Config change: config/models.yaml, config/sources.yaml, tasks/*.yaml changes → miss
14
+ * - Manual bypass: --no-cache flag skips all cache lookups
15
+ * - Cache clear: delete results/cache/ to start fresh
16
+ */
17
+ import { createHash } from "crypto";
18
+ import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync, } from "fs";
19
+ import { join, resolve } from "path";
20
+ // ---------------------------------------------------------------------------
21
+ // Constants
22
+ // ---------------------------------------------------------------------------
23
+ const CACHE_DIR_NAME = "cache";
24
+ const CACHE_VERSION = 1;
25
+ // ---------------------------------------------------------------------------
26
+ // Hashing
27
+ // ---------------------------------------------------------------------------
28
+ /** Create an empty CacheStats object */
29
+ export function createCacheStats() {
30
+ return {
31
+ hits: 0,
32
+ misses: 0,
33
+ skipped: 0,
34
+ steps: {},
35
+ total: 0,
36
+ };
37
+ }
38
+ /** Format cache stats as a human-readable summary line */
39
+ export function formatCacheStats(stats) {
40
+ const { hits, misses, skipped, total } = stats;
41
+ const evaluated = misses;
42
+ const parts = [];
43
+ if (hits > 0)
44
+ parts.push(`${hits} cached`);
45
+ if (evaluated > 0)
46
+ parts.push(`${evaluated} evaluated`);
47
+ if (skipped > 0)
48
+ parts.push(`${skipped} skipped`);
49
+ return `${parts.join(", ")} (${total} total steps)`;
50
+ }
51
+ // ---------------------------------------------------------------------------
52
+ // Cache store
53
+ // ---------------------------------------------------------------------------
54
+ /**
55
+ * Collect all file paths that serve as inputs for a given pipeline step.
56
+ * This is the core of cache key computation — if any of these files change,
57
+ * the step must re-execute.
58
+ */
59
+ export function getStepInputPaths(rootDir, step) {
60
+ const r = (rel) => resolve(rootDir, rel);
61
+ switch (step) {
62
+ case "calculate-scores": {
63
+ // Inputs: eval results file + scoring logic (the script itself)
64
+ return [
65
+ r("results/latest/eval-results.json"),
66
+ r("results/latest/eval-results-agentic.json"),
67
+ r("results/latest/eval-results-observed.json"),
68
+ ].filter((p) => existsSync(p));
69
+ }
70
+ case "eval":
71
+ case "eval-baseline":
72
+ case "eval-agentic":
73
+ case "eval-observed": {
74
+ // Per-mode cache keys: eval-baseline and eval-agentic include only
75
+ // their own config + expanded YAML so changes to one mode's inputs
76
+ // don't invalidate the other's cache entry.
77
+ //
78
+ // The legacy "eval" key includes all configs for backward compat.
79
+ const isBaseline = step === "eval-baseline" || step === "eval";
80
+ const isAgentic = step === "eval-agentic" || step === "eval";
81
+ const isObserved = step === "eval-observed" || step === "eval";
82
+ const paths = [r("config/models.yaml")];
83
+ // Config files — only the relevant ones for this mode
84
+ if (isBaseline) {
85
+ paths.push(r("promptfooconfig.yaml"));
86
+ paths.push(r("tasks/.expanded.yaml"));
87
+ }
88
+ if (isAgentic) {
89
+ paths.push(r("promptfooconfig.agentic.yaml"));
90
+ paths.push(r("tasks/.expanded.agentic.yaml"));
91
+ }
92
+ if (isObserved) {
93
+ paths.push(r("promptfooconfig.observed.yaml"));
94
+ }
95
+ // Context files (shared across modes)
96
+ const contextsDir = r("contexts");
97
+ if (existsSync(contextsDir)) {
98
+ const contextFiles = readdirSync(contextsDir)
99
+ .filter((f) => f.endsWith(".md"))
100
+ .map((f) => join(contextsDir, f));
101
+ paths.push(...contextFiles);
102
+ // Canonical contexts
103
+ const canonicalDir = join(contextsDir, "canonical");
104
+ if (existsSync(canonicalDir)) {
105
+ const canonicalFiles = readdirSync(canonicalDir)
106
+ .filter((f) => f.endsWith(".md"))
107
+ .map((f) => join(canonicalDir, f));
108
+ paths.push(...canonicalFiles);
109
+ }
110
+ }
111
+ // Task files (contain assertions and test definitions).
112
+ // Exclude generated .expanded*.yaml files — those are already listed
113
+ // explicitly above per mode.
114
+ const tasksDir = r("tasks");
115
+ if (existsSync(tasksDir)) {
116
+ const taskFiles = readdirSync(tasksDir)
117
+ .filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) &&
118
+ !f.startsWith(".expanded"))
119
+ .map((f) => join(tasksDir, f));
120
+ paths.push(...taskFiles);
121
+ }
122
+ // Reference solutions (used by grader assertions)
123
+ const refDir = r("canonical/reference-solutions");
124
+ if (existsSync(refDir)) {
125
+ const refFiles = readdirSync(refDir)
126
+ .map((f) => join(refDir, f))
127
+ .filter((f) => statSync(f).isFile());
128
+ paths.push(...refFiles);
129
+ }
130
+ return paths;
131
+ }
132
+ case "fetch-docs": {
133
+ // Inputs: config/sources.yaml, config/models.yaml, task files (which contain inline mappings)
134
+ const paths = [r("config/sources.yaml"), r("config/models.yaml")];
135
+ // Include all task files (they define feature areas)
136
+ const tasksDir = r("tasks");
137
+ if (existsSync(tasksDir)) {
138
+ const taskFiles = readdirSync(tasksDir)
139
+ .filter((f) => f.endsWith(".yaml") || f.endsWith(".yml"))
140
+ .map((f) => join(tasksDir, f));
141
+ paths.push(...taskFiles);
142
+ }
143
+ return paths;
144
+ }
145
+ case "generate-configs": {
146
+ // Inputs: config/models.yaml, config/sources.yaml, all task files
147
+ const paths = [r("config/models.yaml"), r("config/sources.yaml")];
148
+ const tasksDir = r("tasks");
149
+ if (existsSync(tasksDir)) {
150
+ const taskFiles = readdirSync(tasksDir)
151
+ .filter((f) => f.endsWith(".yaml") || f.endsWith(".yml"))
152
+ .map((f) => join(tasksDir, f));
153
+ paths.push(...taskFiles);
154
+ }
155
+ return paths;
156
+ }
157
+ case "report": {
158
+ // Inputs: score summary
159
+ return [r("results/latest/score-summary.json")];
160
+ }
161
+ default:
162
+ return [];
163
+ }
164
+ }
165
+ /**
166
+ * Compute a SHA-256 hash of the concatenated content of multiple files.
167
+ * Files are sorted by path for deterministic ordering. Missing files
168
+ * contribute a sentinel value so the hash changes if a file is deleted.
169
+ *
170
+ * Optional `context` strings are included in the hash so that non-file
171
+ * state (e.g., filter flags, environment variables) can also participate
172
+ * in cache key computation.
173
+ */
174
+ export function hashFiles(paths, context) {
175
+ const hash = createHash("sha256");
176
+ // Include cache version so format changes invalidate everything
177
+ hash.update(`cache-version:${CACHE_VERSION}\n`);
178
+ // Include non-file context (filter flags, env vars, etc.)
179
+ if (context && context.length > 0) {
180
+ for (const c of context) {
181
+ hash.update(`context:${c}\n`);
182
+ }
183
+ }
184
+ const sorted = [...paths].sort();
185
+ for (const filePath of sorted) {
186
+ hash.update(`path:${filePath}\n`);
187
+ if (existsSync(filePath)) {
188
+ const content = readFileSync(filePath);
189
+ hash.update(content);
190
+ }
191
+ else {
192
+ hash.update("__missing__\n");
193
+ }
194
+ hash.update("\n---\n");
195
+ }
196
+ return hash.digest("hex");
197
+ }
198
+ /**
199
+ * Look up the cache for a pipeline step.
200
+ *
201
+ * Computes the current input hash and compares it against the stored manifest.
202
+ * Returns a hit if the hashes match AND all expected output files still exist.
203
+ *
204
+ * Optional `context` strings are included in the hash so that non-file
205
+ * state (e.g., area/task filter flags) participates in cache key computation.
206
+ */
207
+ export function lookupCache(rootDir, step, context) {
208
+ const inputPaths = getStepInputPaths(rootDir, step);
209
+ const currentHash = hashFiles(inputPaths, context);
210
+ const manifest = readManifest(rootDir, step);
211
+ if (!manifest?.entry) {
212
+ return { currentHash, hit: false };
213
+ }
214
+ // Hash must match
215
+ if (manifest.entry.inputHash !== currentHash) {
216
+ return { currentHash, hit: false };
217
+ }
218
+ // All output files must still exist (guard against manual deletion)
219
+ const outputsExist = manifest.entry.outputPaths.every((p) => existsSync(resolve(rootDir, p)));
220
+ if (!outputsExist) {
221
+ return { currentHash, hit: false };
222
+ }
223
+ return { entry: manifest.entry, hit: true };
224
+ }
225
+ /**
226
+ * Read the cache manifest for a step.
227
+ * Returns null if no manifest exists or it's corrupt.
228
+ */
229
+ export function readManifest(rootDir, step) {
230
+ const manifestPath = getManifestPath(rootDir, step);
231
+ if (!existsSync(manifestPath))
232
+ return null;
233
+ try {
234
+ const raw = readFileSync(manifestPath, "utf-8");
235
+ const parsed = JSON.parse(raw);
236
+ if (parsed.step !== step)
237
+ return null;
238
+ return parsed;
239
+ }
240
+ catch {
241
+ return null;
242
+ }
243
+ }
244
+ // ---------------------------------------------------------------------------
245
+ // Cache operations
246
+ // ---------------------------------------------------------------------------
247
+ /**
248
+ * Record a cache entry after a successful step execution.
249
+ */
250
+ export function recordCache(rootDir, step, inputHash, summary, durationMs, outputPaths) {
251
+ const entry = {
252
+ durationMs,
253
+ inputHash,
254
+ outputPaths,
255
+ summary,
256
+ timestamp: new Date().toISOString(),
257
+ };
258
+ writeManifest(rootDir, step, entry);
259
+ }
260
+ /**
261
+ * Write a cache manifest for a step.
262
+ * Creates the cache directory if it doesn't exist.
263
+ */
264
+ export function writeManifest(rootDir, step, entry) {
265
+ const cacheDir = getCacheDir(rootDir);
266
+ if (!existsSync(cacheDir)) {
267
+ mkdirSync(cacheDir, { recursive: true });
268
+ }
269
+ const manifest = { entry, step };
270
+ const manifestPath = getManifestPath(rootDir, step);
271
+ writeFileSync(manifestPath, JSON.stringify(manifest, null, 2));
272
+ }
273
+ // ---------------------------------------------------------------------------
274
+ // Cache stats
275
+ // ---------------------------------------------------------------------------
276
+ /** Get the cache directory path */
277
+ function getCacheDir(rootDir) {
278
+ return resolve(rootDir, "results", CACHE_DIR_NAME);
279
+ }
280
+ /** Get the manifest file path for a step */
281
+ function getManifestPath(rootDir, step) {
282
+ return join(getCacheDir(rootDir), `${step}.json`);
283
+ }
@@ -0,0 +1,102 @@
1
+ import { type ResolvedSourceConfig } from "../sources.js";
2
+ import { type ActualScoreEntry, type ComponentResult } from "../_vendor/ailf-core/index.d.ts";
3
+ import type { GraderJudgment, PerModelEntry } from "./types.js";
4
+ export { classifyRubric, detectFeatureArea, extractUrlMetadata, mergeScores, parseRubricScore, } from "../_vendor/ailf-core/index.d.ts";
5
+ export type { ActualScoreEntry, ComponentResult, TestResult, UrlMetadata, } from "../_vendor/ailf-core/index.d.ts";
6
+ export interface PromptfooResultsWrapper {
7
+ results: RawTestResult[];
8
+ stats: {
9
+ successes: number;
10
+ failures: number;
11
+ tokenUsage?: {
12
+ assertions?: {
13
+ completion: number;
14
+ prompt: number;
15
+ total: number;
16
+ };
17
+ completion: number;
18
+ prompt: number;
19
+ total: number;
20
+ };
21
+ };
22
+ }
23
+ export interface RawPromptfooFile {
24
+ [key: string]: unknown;
25
+ config?: {
26
+ defaultTest?: {
27
+ options?: {
28
+ provider?: string;
29
+ rubricProvider?: string;
30
+ };
31
+ };
32
+ };
33
+ results: PromptfooResultsWrapper;
34
+ }
35
+ export interface RawTestResult {
36
+ cost?: number;
37
+ error?: string;
38
+ gradingResult: null | {
39
+ componentResults: ComponentResult[];
40
+ pass: boolean;
41
+ };
42
+ metadata?: Record<string, unknown>;
43
+ provider?: {
44
+ id?: string;
45
+ label?: string;
46
+ };
47
+ response: {
48
+ output: string;
49
+ };
50
+ testCase?: {
51
+ description?: string;
52
+ vars?: Record<string, string>;
53
+ };
54
+ vars: Record<string, string>;
55
+ }
56
+ /**
57
+ * Calculate scores grouped by model. Each model gets its own FeatureScore[]
58
+ * and model-level aggregates.
59
+ *
60
+ * Uses the provider.id from Promptfoo results to identify models.
61
+ * Falls back to provider.label, then "unknown" if neither is available.
62
+ *
63
+ * @returns Record keyed by model ID, or null if only one model was used
64
+ * (per-model breakdown is redundant when there's only one model).
65
+ */
66
+ export declare function calculateScoresPerModel(resultsPath: string, weights: Record<string, number>): null | PerModelEntry[];
67
+ /**
68
+ * Extract grader judgments (reason text + scores) from evaluation results.
69
+ *
70
+ * This preserves the grader's natural language reasoning for downstream
71
+ * analysis (failure mode classification, gap analysis). Each llm-rubric
72
+ * assertion produces one GraderJudgment entry.
73
+ *
74
+ * Phase 3a prerequisite: structured judgment data for failure mode extraction.
75
+ */
76
+ export declare function extractGraderJudgments(resultsPath: string): GraderJudgment[];
77
+ /**
78
+ * Score agentic evaluation results. In agentic mode, all test entries are
79
+ * gold-only (no baseline entries — the .expanded.agentic.yaml fix ensures this).
80
+ * The model retrieves docs via tools, so all results map to "actual" scores.
81
+ *
82
+ * Returns a record keyed by feature area with the composite actual score.
83
+ */
84
+ export declare function scoreAgenticResults(resultsPath: string, weights: Record<string, number>): Record<string, ActualScoreEntry>;
85
+ /** Options for the calculate-scores main() function. */
86
+ export interface CalculateScoresOptions {
87
+ /** Allowed origins for source isolation reporting */
88
+ allowedOrigins?: string[];
89
+ /** Evaluation mode (controls which result files are read) */
90
+ mode?: string;
91
+ /** Pre-resolved source config (skips loadSource() call) */
92
+ resolvedSource?: ResolvedSourceConfig;
93
+ /** Path to baseline results file (default: results/latest/eval-results.json) */
94
+ resultsPath?: string;
95
+ /** Root directory of the eval package (required) */
96
+ rootDir: string;
97
+ /** Search mode for source verification metadata */
98
+ searchMode?: string;
99
+ /** Documentation source name */
100
+ source?: string;
101
+ }
102
+ export declare function calculateAndWriteScores(options: CalculateScoresOptions): void;