@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,108 @@
1
+ /**
2
+ * @sanity/ailf-core — Sink schemas
3
+ *
4
+ * Zod schemas for runtime validation of config/sinks.yaml — the configuration
5
+ * file for report delivery sinks.
6
+ *
7
+ * Each sink type has its own config schema, and the top-level SinksFileSchema
8
+ * validates the complete YAML structure. The discriminated union on `type`
9
+ * ensures type-safe parsing with clear error messages for unknown sink types.
10
+ *
11
+ * Usage:
12
+ * import { SinksFileSchema } from "./schema.js"
13
+ * const parsed = SinksFileSchema.parse(rawYaml)
14
+ *
15
+ * @see docs/design-docs/report-store/sink-architecture.md
16
+ */
17
+ import { z } from "zod";
18
+ // ---------------------------------------------------------------------------
19
+ // Sink type enum
20
+ // ---------------------------------------------------------------------------
21
+ /** All supported sink types as a Zod union. */
22
+ export const SinkTypeSchema = z.enum([
23
+ "bigquery",
24
+ "github-comment",
25
+ "slack",
26
+ "webhook",
27
+ ]);
28
+ // ---------------------------------------------------------------------------
29
+ // Routing rules — severity-aware delivery targeting (Phase 5c)
30
+ // ---------------------------------------------------------------------------
31
+ /**
32
+ * Schema for sink routing rules.
33
+ * Controls which messages are delivered based on severity and type.
34
+ * When omitted, the sink receives all reports (backward compatible).
35
+ */
36
+ export const SinkRoutingSchema = z.object({
37
+ /** Deliver on critical threshold violations */
38
+ critical: z.union([z.boolean(), z.string()]).optional(),
39
+ /** Deliver weekly digest messages */
40
+ digest: z.union([z.boolean(), z.string()]).optional(),
41
+ /** Deliver on info-level threshold violations */
42
+ info: z.union([z.boolean(), z.string()]).optional(),
43
+ /** Deliver on regression detection */
44
+ regression: z.union([z.boolean(), z.string()]).optional(),
45
+ /** Deliver on warning-level threshold violations */
46
+ warning: z.union([z.boolean(), z.string()]).optional(),
47
+ });
48
+ /** Inferred TypeScript type for sink routing rules. */
49
+ // ---------------------------------------------------------------------------
50
+ // Individual sink config schemas
51
+ // ---------------------------------------------------------------------------
52
+ /** Config schema for a generic webhook sink. */
53
+ export const WebhookSinkConfigSchema = z.object({
54
+ enabled: z.boolean().default(false),
55
+ headers: z.record(z.string(), z.string()).optional(),
56
+ routing: SinkRoutingSchema.optional(),
57
+ type: z.literal("webhook"),
58
+ url: z.string().url(),
59
+ });
60
+ // ---------------------------------------------------------------------------
61
+ // Individual sink config schemas
62
+ // ---------------------------------------------------------------------------
63
+ /** Config schema for a Slack notification sink. */
64
+ export const SlackSinkConfigSchema = z.object({
65
+ channel: z.string().optional(),
66
+ enabled: z.boolean().default(false),
67
+ routing: SinkRoutingSchema.optional(),
68
+ type: z.literal("slack"),
69
+ webhookUrl: z.string().url(),
70
+ });
71
+ /** Config schema for a GitHub PR comment sink. */
72
+ export const GitHubCommentSinkConfigSchema = z.object({
73
+ enabled: z.boolean().default(false),
74
+ routing: SinkRoutingSchema.optional(),
75
+ token: z.string().optional(),
76
+ type: z.literal("github-comment"),
77
+ });
78
+ /** Config schema for a BigQuery sink. */
79
+ export const BigQuerySinkConfigSchema = z.object({
80
+ credentials: z.string().optional(),
81
+ dataset: z.string(),
82
+ enabled: z.boolean().default(false),
83
+ project: z.string(),
84
+ routing: SinkRoutingSchema.optional(),
85
+ type: z.literal("bigquery"),
86
+ });
87
+ // ---------------------------------------------------------------------------
88
+ // Discriminated union across all sink types
89
+ // ---------------------------------------------------------------------------
90
+ /**
91
+ * A single sink configuration — discriminated on the `type` field.
92
+ *
93
+ * Zod v4 discriminatedUnion provides clear errors when the `type` value
94
+ * doesn't match any known sink.
95
+ */
96
+ export const SinkConfigSchema = z.discriminatedUnion("type", [
97
+ BigQuerySinkConfigSchema,
98
+ GitHubCommentSinkConfigSchema,
99
+ SlackSinkConfigSchema,
100
+ WebhookSinkConfigSchema,
101
+ ]);
102
+ // ---------------------------------------------------------------------------
103
+ // Top-level file schema (config/sinks.yaml)
104
+ // ---------------------------------------------------------------------------
105
+ /** Schema for the complete config/sinks.yaml file. */
106
+ export const SinksFileSchema = z.object({
107
+ sinks: z.array(SinkConfigSchema).default([]),
108
+ });
@@ -0,0 +1,18 @@
1
+ /**
2
+ * core/services/comparison-formatters.ts — Pure formatting functions for
3
+ * comparison reports.
4
+ *
5
+ * No I/O — takes a ComparisonReport, returns a string.
6
+ *
7
+ * Extracted from packages/eval/src/lib/compare.ts during the
8
+ * Ports & Adapters migration (Phase 4e).
9
+ */
10
+ import type { ComparisonReport } from "../types/index.js";
11
+ /**
12
+ * Generate a markdown comparison section suitable for PR comments.
13
+ */
14
+ export declare function formatComparisonMarkdown(report: ComparisonReport): string;
15
+ /**
16
+ * Generate a human-readable console comparison table.
17
+ */
18
+ export declare function formatComparisonTable(report: ComparisonReport): string;
@@ -0,0 +1,189 @@
1
+ /**
2
+ * core/services/comparison-formatters.ts — Pure formatting functions for
3
+ * comparison reports.
4
+ *
5
+ * No I/O — takes a ComparisonReport, returns a string.
6
+ *
7
+ * Extracted from packages/eval/src/lib/compare.ts during the
8
+ * Ports & Adapters migration (Phase 4e).
9
+ */
10
+ // ---------------------------------------------------------------------------
11
+ // Markdown format (for PR comments)
12
+ // ---------------------------------------------------------------------------
13
+ /**
14
+ * Generate a markdown comparison section suitable for PR comments.
15
+ */
16
+ export function formatComparisonMarkdown(report) {
17
+ const lines = [];
18
+ const overall = report.deltas.overall;
19
+ const overallIcon = changeIcon(overall > report.noiseThreshold
20
+ ? "improved"
21
+ : overall < -report.noiseThreshold
22
+ ? "regressed"
23
+ : "unchanged");
24
+ lines.push("### 📊 Score Comparison");
25
+ lines.push("");
26
+ lines.push(`**Overall: ${Math.round(report.baseline.overall.avgScore)} → ${Math.round(report.experiment.overall.avgScore)}** (${overallIcon} ${deltaStr(overall)})`);
27
+ lines.push("");
28
+ // Per-area table
29
+ lines.push("| Feature | Baseline | Current | Delta | Task | Code | Docs |");
30
+ lines.push("|---------|----------|---------|-------|------|------|------|");
31
+ for (const a of report.areas) {
32
+ const icon = changeIcon(a.change);
33
+ lines.push(`| ${a.area} | ${a.baseline} | ${a.experiment} | ${icon} ${deltaStr(a.delta)} | ${deltaStr(a.dimensions.taskCompletion.delta)} | ${deltaStr(a.dimensions.codeCorrectness.delta)} | ${deltaStr(a.dimensions.docCoverage.delta)} |`);
34
+ }
35
+ lines.push("");
36
+ // Summary
37
+ const parts = [];
38
+ if (report.improved.length > 0) {
39
+ parts.push(`📈 ${report.improved.length} improved`);
40
+ }
41
+ if (report.regressed.length > 0) {
42
+ parts.push(`📉 ${report.regressed.length} regressed`);
43
+ }
44
+ if (report.unchanged.length > 0) {
45
+ parts.push(`➡️ ${report.unchanged.length} unchanged`);
46
+ }
47
+ if (parts.length > 0) {
48
+ lines.push(parts.join(" · "));
49
+ lines.push("");
50
+ }
51
+ // Dimension averages in collapsible
52
+ lines.push("<details>");
53
+ lines.push("<summary>Dimension averages</summary>");
54
+ lines.push("");
55
+ const dim = report.deltas.perDimension;
56
+ lines.push("| Dimension | Delta |");
57
+ lines.push("|-----------|-------|");
58
+ lines.push(`| Task Completion | ${deltaStr(dim.taskCompletion)} |`);
59
+ lines.push(`| Code Correctness | ${deltaStr(dim.codeCorrectness)} |`);
60
+ lines.push(`| Doc Coverage | ${deltaStr(dim.docCoverage)} |`);
61
+ lines.push(`| Doc Lift | ${deltaStr(report.deltas.docLift)} |`);
62
+ if (report.deltas.cost !== undefined) {
63
+ const costStr = report.deltas.cost > 0
64
+ ? `+$${report.deltas.cost.toFixed(4)}`
65
+ : `-$${Math.abs(report.deltas.cost).toFixed(4)}`;
66
+ lines.push(`| Cost | ${costStr} |`);
67
+ }
68
+ lines.push("");
69
+ lines.push("</details>");
70
+ lines.push("");
71
+ return lines.join("\n");
72
+ }
73
+ // ---------------------------------------------------------------------------
74
+ // Console table format
75
+ // ---------------------------------------------------------------------------
76
+ /**
77
+ * Generate a human-readable console comparison table.
78
+ */
79
+ export function formatComparisonTable(report) {
80
+ const lines = [];
81
+ lines.push("=".repeat(80));
82
+ lines.push(" COMPARISON REPORT");
83
+ lines.push("=".repeat(80));
84
+ lines.push("");
85
+ // Overall summary
86
+ const overall = report.deltas.overall;
87
+ const overallIcon = changeIcon(overall > report.noiseThreshold
88
+ ? "improved"
89
+ : overall < -report.noiseThreshold
90
+ ? "regressed"
91
+ : "unchanged");
92
+ lines.push(` Overall: ${Math.round(report.baseline.overall.avgScore)} → ${Math.round(report.experiment.overall.avgScore)} (${overallIcon} ${deltaStr(overall)})`);
93
+ lines.push("");
94
+ // Per-dimension averages
95
+ const dim = report.deltas.perDimension;
96
+ lines.push(" Dimension averages:");
97
+ lines.push(` Task Completion: ${deltaStr(dim.taskCompletion)}`);
98
+ lines.push(` Code Correctness: ${deltaStr(dim.codeCorrectness)}`);
99
+ lines.push(` Doc Coverage: ${deltaStr(dim.docCoverage)}`);
100
+ lines.push(` Doc Lift: ${deltaStr(report.deltas.docLift)}`);
101
+ if (report.deltas.cost !== undefined) {
102
+ lines.push(` Cost: ${report.deltas.cost > 0 ? "+" : ""}$${report.deltas.cost.toFixed(4)}`);
103
+ }
104
+ lines.push("");
105
+ // Per-area table
106
+ lines.push("-".repeat(80));
107
+ lines.push("PER-AREA BREAKDOWN");
108
+ lines.push("-".repeat(80));
109
+ lines.push("");
110
+ const h = "| Feature Area | Baseline | Experiment | Delta | Task | Code | Docs |";
111
+ const sep = "|---------------------|----------|------------|-------|------|------|------|";
112
+ lines.push(h);
113
+ lines.push(sep);
114
+ for (const a of report.areas) {
115
+ const icon = changeIcon(a.change);
116
+ lines.push(`| ${icon} ${a.area.padEnd(17)} | ${String(a.baseline).padStart(8)} | ${String(a.experiment).padStart(10)} | ${deltaStr(a.delta).padStart(5)} | ${deltaStr(a.dimensions.taskCompletion.delta).padStart(4)} | ${deltaStr(a.dimensions.codeCorrectness.delta).padStart(4)} | ${deltaStr(a.dimensions.docCoverage.delta).padStart(4)} |`);
117
+ }
118
+ lines.push("");
119
+ // Classification summary
120
+ if (report.improved.length > 0) {
121
+ lines.push(` 📈 Improved: ${report.improved.join(", ")}`);
122
+ }
123
+ if (report.regressed.length > 0) {
124
+ lines.push(` 📉 Regressed: ${report.regressed.join(", ")}`);
125
+ }
126
+ if (report.unchanged.length > 0) {
127
+ lines.push(` ➡️ Unchanged: ${report.unchanged.join(", ")}`);
128
+ }
129
+ lines.push("");
130
+ // Mismatched areas
131
+ if (report.mismatched.onlyInBaseline.length > 0 ||
132
+ report.mismatched.onlyInExperiment.length > 0) {
133
+ lines.push(" ⚠️ Area mismatches:");
134
+ if (report.mismatched.onlyInBaseline.length > 0) {
135
+ lines.push(` Only in baseline: ${report.mismatched.onlyInBaseline.join(", ")}`);
136
+ }
137
+ if (report.mismatched.onlyInExperiment.length > 0) {
138
+ lines.push(` Only in experiment: ${report.mismatched.onlyInExperiment.join(", ")}`);
139
+ }
140
+ lines.push("");
141
+ }
142
+ const isEmpirical = "noiseThresholdEmpirical" in report &&
143
+ report.noiseThresholdEmpirical === true;
144
+ const thresholdSource = isEmpirical
145
+ ? "empirical, from grader consistency data"
146
+ : "default";
147
+ lines.push(` Noise threshold: ±${report.noiseThreshold}${Number.isInteger(report.noiseThreshold) ? "" : ` (${report.noiseThreshold.toFixed(1)})`} (${thresholdSource})`);
148
+ lines.push("");
149
+ // Ceiling decomposition deltas (when areas have ceiling data)
150
+ const hasCeilingData = report.areas.some((a) => a.ceilingDelta !== undefined);
151
+ if (hasCeilingData) {
152
+ lines.push("-".repeat(80));
153
+ lines.push("CEILING DECOMPOSITION DELTAS");
154
+ lines.push("-".repeat(80));
155
+ lines.push("");
156
+ const cH = "| Feature Area | Ceiling Δ | Floor Δ | Doc Lift Δ |";
157
+ const cSep = "|---------------------|-----------|---------|------------|";
158
+ lines.push(cH);
159
+ lines.push(cSep);
160
+ for (const a of report.areas) {
161
+ lines.push(`| ${a.area.padEnd(19)} | ` +
162
+ `${deltaStr(a.ceilingDelta).padStart(9)} | ` +
163
+ `${deltaStr(a.floorDelta).padStart(7)} | ` +
164
+ `${deltaStr(a.docLiftDelta).padStart(10)} |`);
165
+ }
166
+ lines.push("");
167
+ }
168
+ return lines.join("\n");
169
+ }
170
+ // ---------------------------------------------------------------------------
171
+ // Helpers (private)
172
+ // ---------------------------------------------------------------------------
173
+ function changeIcon(change) {
174
+ switch (change) {
175
+ case "improved":
176
+ return "📈";
177
+ case "regressed":
178
+ return "📉";
179
+ default:
180
+ return "➡️";
181
+ }
182
+ }
183
+ function deltaStr(d) {
184
+ if (d > 0)
185
+ return `+${Math.round(d)}`;
186
+ if (d < 0)
187
+ return `${Math.round(d)}`;
188
+ return "0";
189
+ }
@@ -0,0 +1,41 @@
1
+ /**
2
+ * core/services/config-helpers.ts — Pure config generation helpers.
3
+ *
4
+ * No I/O — operates on typed inputs and returns typed outputs.
5
+ *
6
+ * Extracted from packages/eval/src/lib/generate-configs.ts during
7
+ * the Ports & Adapters migration (Phase 4e).
8
+ */
9
+ import type { ModelEntry } from "../types/index.js";
10
+ /**
11
+ * Extract the raw API model name from a Promptfoo provider ID.
12
+ *
13
+ * Promptfoo IDs encode the provider + sub-protocol + model, e.g.:
14
+ * - "openai:chat:gpt-5.2" → "gpt-5.2"
15
+ * - "anthropic:messages:claude-opus-4-6" → "claude-opus-4-6"
16
+ * - "openrouter:deepseek/deepseek-r1" → "deepseek/deepseek-r1"
17
+ * - "gpt-4o" → "gpt-4o"
18
+ */
19
+ export declare function extractModelName(id: string): string;
20
+ /**
21
+ * Extract the LLM provider family from a Promptfoo provider ID.
22
+ *
23
+ * - "openai:chat:gpt-5.2" → "openai"
24
+ * - "anthropic:messages:claude-opus-4-6" → "anthropic"
25
+ * - "google:gemini-2.5-pro" → "google"
26
+ * - "gpt-4o" → "openai" (default)
27
+ */
28
+ export declare function extractProvider(id: string): string;
29
+ /**
30
+ * Merge default, model-specific, and mode-specific config into a single object.
31
+ *
32
+ * Only picks scalar defaults (temperature, max_tokens). Model and mode
33
+ * overrides are applied on top.
34
+ */
35
+ export declare function mergeConfig(defaults: Record<string, unknown>, modelConfig?: Record<string, unknown>, overrides?: Record<string, unknown>): Record<string, unknown>;
36
+ /**
37
+ * Check whether a model entry matches a given evaluation mode.
38
+ *
39
+ * Models without a `modes` field match all modes.
40
+ */
41
+ export declare function modelMatchesMode(model: ModelEntry, mode: string): boolean;
@@ -0,0 +1,86 @@
1
+ /**
2
+ * core/services/config-helpers.ts — Pure config generation helpers.
3
+ *
4
+ * No I/O — operates on typed inputs and returns typed outputs.
5
+ *
6
+ * Extracted from packages/eval/src/lib/generate-configs.ts during
7
+ * the Ports & Adapters migration (Phase 4e).
8
+ */
9
+ /**
10
+ * Known Promptfoo provider prefixes, ordered longest-first so
11
+ * "openai:chat:" is tried before "openai:".
12
+ */
13
+ const PROVIDER_PREFIXES = [
14
+ "anthropic:messages:",
15
+ "openai:chat:",
16
+ "openai:responses:",
17
+ "openai:",
18
+ "anthropic:",
19
+ "google:",
20
+ ];
21
+ /**
22
+ * Extract the raw API model name from a Promptfoo provider ID.
23
+ *
24
+ * Promptfoo IDs encode the provider + sub-protocol + model, e.g.:
25
+ * - "openai:chat:gpt-5.2" → "gpt-5.2"
26
+ * - "anthropic:messages:claude-opus-4-6" → "claude-opus-4-6"
27
+ * - "openrouter:deepseek/deepseek-r1" → "deepseek/deepseek-r1"
28
+ * - "gpt-4o" → "gpt-4o"
29
+ */
30
+ export function extractModelName(id) {
31
+ for (const prefix of PROVIDER_PREFIXES) {
32
+ if (id.startsWith(prefix)) {
33
+ return id.slice(prefix.length);
34
+ }
35
+ }
36
+ // Fallback: strip first colon-separated segment
37
+ const parts = id.split(":");
38
+ return parts.length > 1 ? parts.slice(1).join(":") : id;
39
+ }
40
+ /**
41
+ * Extract the LLM provider family from a Promptfoo provider ID.
42
+ *
43
+ * - "openai:chat:gpt-5.2" → "openai"
44
+ * - "anthropic:messages:claude-opus-4-6" → "anthropic"
45
+ * - "google:gemini-2.5-pro" → "google"
46
+ * - "gpt-4o" → "openai" (default)
47
+ */
48
+ export function extractProvider(id) {
49
+ const colon = id.indexOf(":");
50
+ return colon > 0 ? id.slice(0, colon) : "openai";
51
+ }
52
+ /**
53
+ * Merge default, model-specific, and mode-specific config into a single object.
54
+ *
55
+ * Only picks scalar defaults (temperature, max_tokens). Model and mode
56
+ * overrides are applied on top.
57
+ */
58
+ export function mergeConfig(defaults, modelConfig, overrides) {
59
+ const result = {};
60
+ // Only pick scalar defaults (temperature, max_tokens)
61
+ for (const [key, value] of Object.entries(defaults)) {
62
+ if (typeof value !== "object" || value === null) {
63
+ result[key] = value;
64
+ }
65
+ }
66
+ // Model-specific overrides
67
+ if (modelConfig) {
68
+ Object.assign(result, modelConfig);
69
+ }
70
+ // Mode-specific overrides
71
+ if (overrides) {
72
+ Object.assign(result, overrides);
73
+ }
74
+ return result;
75
+ }
76
+ /**
77
+ * Check whether a model entry matches a given evaluation mode.
78
+ *
79
+ * Models without a `modes` field match all modes.
80
+ */
81
+ export function modelMatchesMode(model, mode) {
82
+ if (!model.modes || model.modes.length === 0) {
83
+ return true;
84
+ }
85
+ return model.modes.includes(mode);
86
+ }
@@ -0,0 +1,12 @@
1
+ /**
2
+ * @sanity/ailf-core — Domain services
3
+ *
4
+ * Pure computation functions with zero I/O. These operate on typed
5
+ * inputs and return typed outputs.
6
+ *
7
+ * Extracted from packages/eval/src/lib/ during the Ports & Adapters
8
+ * migration (Phase 4e).
9
+ */
10
+ export { classifyRubric, detectFeatureArea, extractUrlMetadata, mergeScores, parseRubricScore, } from "./scoring.js";
11
+ export { formatComparisonMarkdown, formatComparisonTable, } from "./comparison-formatters.js";
12
+ export { extractModelName, extractProvider, mergeConfig, modelMatchesMode, } from "./config-helpers.js";
@@ -0,0 +1,12 @@
1
+ /**
2
+ * @sanity/ailf-core — Domain services
3
+ *
4
+ * Pure computation functions with zero I/O. These operate on typed
5
+ * inputs and return typed outputs.
6
+ *
7
+ * Extracted from packages/eval/src/lib/ during the Ports & Adapters
8
+ * migration (Phase 4e).
9
+ */
10
+ export { classifyRubric, detectFeatureArea, extractUrlMetadata, mergeScores, parseRubricScore, } from "./scoring.js";
11
+ export { formatComparisonMarkdown, formatComparisonTable, } from "./comparison-formatters.js";
12
+ export { extractModelName, extractProvider, mergeConfig, modelMatchesMode, } from "./config-helpers.js";
@@ -0,0 +1,49 @@
1
+ /**
2
+ * core/services/scoring.ts — Pure scoring functions.
3
+ *
4
+ * These functions operate on typed inputs and produce typed outputs
5
+ * with zero I/O (no filesystem, network, or console access).
6
+ *
7
+ * Extracted from packages/eval/src/lib/calculate-scores.ts during
8
+ * the Ports & Adapters migration (Phase 4e).
9
+ */
10
+ import type { FeatureScore } from "../types/index.js";
11
+ import type { ActualScoreEntry, ComponentResult, TestResult, UrlMetadata } from "../types/scoring-input.js";
12
+ /**
13
+ * Classify a grading component into a scoring dimension.
14
+ *
15
+ * Prefers structured metadata (Approach 5) over heuristic string matching.
16
+ * Returns null if the component doesn't map to a known dimension.
17
+ */
18
+ export declare function classifyRubric(component: ComponentResult): "codeCorrectness" | "docCoverage" | "taskCompletion" | null;
19
+ /**
20
+ * Detect the feature area from a test description string.
21
+ *
22
+ * Uses keyword matching on the lowercased description.
23
+ */
24
+ export declare function detectFeatureArea(description: string): string;
25
+ /**
26
+ * Extract a numeric score (0–100) from a grading component result.
27
+ *
28
+ * Tries: direct score field → JSON-parsed reason → bare number in reason.
29
+ */
30
+ export declare function parseRubricScore(component: ComponentResult): number;
31
+ /**
32
+ * Extract URL metadata from a test result's JavaScript assertion output.
33
+ *
34
+ * Returns null if the test doesn't contain URL extraction data.
35
+ */
36
+ export declare function extractUrlMetadata(test: TestResult): null | UrlMetadata;
37
+ /**
38
+ * Merge baseline FeatureScore[] with agentic actual scores to produce
39
+ * the full three-layer decomposition.
40
+ *
41
+ * The merge is per feature area. For each area:
42
+ * - If baseline data exists: floor, ceiling, docLift, docQualityGap are populated
43
+ * - If agentic data exists: actualScore is populated
44
+ * - If both exist: retrievalGap and infrastructureEfficiency are computed
45
+ *
46
+ * @param baselineScores Floor/ceiling scores from baseline evaluation (may be empty)
47
+ * @param agenticScores Actual scores from agentic evaluation (may be empty)
48
+ */
49
+ export declare function mergeScores(baselineScores: FeatureScore[], agenticScores: Record<string, ActualScoreEntry>): FeatureScore[];