@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,348 @@
1
+ /**
2
+ * pipeline/failure-modes.ts
3
+ *
4
+ * Keyword-based failure mode classifier for grader reasoning text,
5
+ * cross-referenced with ceiling decomposition data.
6
+ *
7
+ * Phase 3a of the Scenario Matrix implementation.
8
+ *
9
+ * The classifier uses two signal sources:
10
+ * 1. Keyword matching on grader reason text (primary)
11
+ * 2. Ceiling decomposition structural signals (supplementary)
12
+ *
13
+ * When both sources agree, confidence is boosted. When only ceiling
14
+ * signals are available, they serve as a fallback for unclassified cases.
15
+ *
16
+ * @see docs/exec-plans/completed/scenario-matrix-implementation/phase-3-gap-analysis.md
17
+ */
18
+ import { detectFeatureArea } from "../_vendor/ailf-core/index.js";
19
+ // ---------------------------------------------------------------------------
20
+ // Constants
21
+ // ---------------------------------------------------------------------------
22
+ /** Only classify judgments with scores below this threshold */
23
+ const CLASSIFICATION_THRESHOLD = 60;
24
+ /** All failure mode types for initializing empty counts */
25
+ const ALL_MODES = [
26
+ "incorrect-docs",
27
+ "missing-docs",
28
+ "model-limitation",
29
+ "outdated-docs",
30
+ "poor-structure",
31
+ "unclassified",
32
+ ];
33
+ // ---------------------------------------------------------------------------
34
+ // Keyword patterns
35
+ // ---------------------------------------------------------------------------
36
+ const OUTDATED_PATTERN = /deprecated|old api|v[0-9]+ syntax|no longer supported|legacy|previous version|outdated|superseded|replaced by/i;
37
+ const MISSING_PATTERN = /no documentation|not covered|had to guess|not found|missing.*doc|no.*information|undocumented|couldn't find|without.*documentation/i;
38
+ const INCORRECT_PATTERN = /contradicts|incorrect.*doc|doc.*incorrect|wrong.*doc|doc.*wrong|documentation says.*but|factual error|inaccurate|misleading.*doc/i;
39
+ const POOR_STRUCTURE_PATTERN = /unclear|ambiguous|couldn't determine|conflicting|confusing|hard to follow|poorly organized|scattered|fragmented/i;
40
+ // ---------------------------------------------------------------------------
41
+ // Public API
42
+ // ---------------------------------------------------------------------------
43
+ /**
44
+ * Build a complete failure mode report from grader judgments and scores.
45
+ *
46
+ * @param judgments - All grader judgments from the evaluation
47
+ * @param scores - Per-area feature scores (for ceiling decomposition)
48
+ * @returns Failure mode report with per-area breakdowns
49
+ */
50
+ export function buildFailureModeReport(judgments, scores) {
51
+ // Build a lookup from area → scores
52
+ const scoreByArea = new Map();
53
+ for (const score of scores) {
54
+ scoreByArea.set(score.feature, score);
55
+ }
56
+ const classifiedJudgments = [];
57
+ const summary = initModeCounts();
58
+ const byArea = {};
59
+ for (const judgment of judgments) {
60
+ // Extract area from taskId description (e.g., "GROQ - Blog queries..." → "groq")
61
+ const area = resolveArea(judgment.taskId, scoreByArea);
62
+ const areaScore = area ? scoreByArea.get(area) : undefined;
63
+ const ceilingScore = areaScore?.ceilingScore ?? 100;
64
+ const floorScore = areaScore?.floorScore ?? 0;
65
+ const classification = classifyFailureMode(judgment, ceilingScore, floorScore);
66
+ classifiedJudgments.push({ classification, judgment });
67
+ summary[classification.mode]++;
68
+ // Per-area tracking
69
+ if (area) {
70
+ if (!byArea[area]) {
71
+ byArea[area] = {
72
+ area,
73
+ modes: initModeCounts(),
74
+ topMode: "unclassified",
75
+ totalJudgments: 0,
76
+ };
77
+ }
78
+ byArea[area].modes[classification.mode]++;
79
+ byArea[area].totalJudgments++;
80
+ }
81
+ }
82
+ // Compute top mode per area
83
+ for (const areaData of Object.values(byArea)) {
84
+ areaData.topMode = findTopMode(areaData.modes);
85
+ }
86
+ const totalJudgments = judgments.length;
87
+ const classified = totalJudgments - (summary["unclassified"] ?? 0);
88
+ const classificationRate = totalJudgments > 0 ? (classified / totalJudgments) * 100 : 0;
89
+ return {
90
+ byArea,
91
+ classificationRate,
92
+ classifiedJudgments,
93
+ summary,
94
+ totalJudgments,
95
+ };
96
+ }
97
+ /**
98
+ * Classify the failure mode of a low-scoring grader judgment.
99
+ *
100
+ * Uses keyword matching on the reason text, then cross-references with
101
+ * ceiling decomposition data for structural confirmation.
102
+ *
103
+ * @param judgment - The grader judgment to classify
104
+ * @param ceilingScore - The area's ceiling score (with-docs best case)
105
+ * @param floorScore - The area's floor score (no-docs baseline)
106
+ * @returns Classified failure mode with confidence level
107
+ */
108
+ export function classifyFailureMode(judgment, ceilingScore, floorScore) {
109
+ // Passing scores don't need failure mode analysis
110
+ if (judgment.score >= CLASSIFICATION_THRESHOLD) {
111
+ return { confidence: "low", mode: "unclassified", source: "keyword" };
112
+ }
113
+ const reason = judgment.reason.toLowerCase();
114
+ // Step 1: Keyword-based classification
115
+ const keywordMode = classifyByKeyword(reason);
116
+ // Step 2: Ceiling-based structural classification
117
+ const ceilingMode = classifyByCeiling(judgment.score, ceilingScore, floorScore);
118
+ // Step 3: Combine signals
119
+ return combineClassifications(keywordMode, ceilingMode);
120
+ }
121
+ // ---------------------------------------------------------------------------
122
+ // Formatting
123
+ // ---------------------------------------------------------------------------
124
+ /**
125
+ * Format a failure mode report for console output.
126
+ */
127
+ export function formatFailureModesConsole(report) {
128
+ const lines = [];
129
+ lines.push("🔍 FAILURE MODE ANALYSIS");
130
+ lines.push("");
131
+ lines.push(` ${report.totalJudgments} judgments analyzed, ${report.classificationRate.toFixed(0)}% classified`);
132
+ lines.push("");
133
+ // Summary table
134
+ lines.push(" Mode Count");
135
+ lines.push(" ────────────────── ─────");
136
+ for (const mode of ALL_MODES) {
137
+ const count = report.summary[mode] ?? 0;
138
+ if (count > 0) {
139
+ const icon = modeIcon(mode);
140
+ lines.push(` ${icon} ${mode.padEnd(18)} ${count}`);
141
+ }
142
+ }
143
+ lines.push("");
144
+ // Per-area breakdown
145
+ if (Object.keys(report.byArea).length > 0) {
146
+ lines.push(" Per-area top failure modes:");
147
+ for (const [area, data] of Object.entries(report.byArea).sort(([a], [b]) => a.localeCompare(b))) {
148
+ const icon = modeIcon(data.topMode);
149
+ lines.push(` ${area}: ${icon} ${data.topMode} (${data.totalJudgments} judgments)`);
150
+ }
151
+ lines.push("");
152
+ }
153
+ return lines.join("\n");
154
+ }
155
+ /**
156
+ * Format a failure mode report as markdown for PR comments.
157
+ */
158
+ export function formatFailureModesMarkdown(report) {
159
+ const lines = [];
160
+ lines.push("### 🔍 Failure Mode Analysis");
161
+ lines.push("");
162
+ if (report.totalJudgments === 0) {
163
+ lines.push("No judgments analyzed.");
164
+ return lines.join("\n");
165
+ }
166
+ lines.push(`**${report.totalJudgments} judgments** analyzed, **${report.classificationRate.toFixed(0)}%** classified`);
167
+ lines.push("");
168
+ // Summary table
169
+ lines.push("| Mode | Count | % |");
170
+ lines.push("|------|-------|---|");
171
+ for (const mode of ALL_MODES) {
172
+ const count = report.summary[mode] ?? 0;
173
+ if (count > 0) {
174
+ const pct = report.totalJudgments > 0
175
+ ? ((count / report.totalJudgments) * 100).toFixed(0)
176
+ : "0";
177
+ const icon = modeIcon(mode);
178
+ lines.push(`| ${icon} ${mode} | ${count} | ${pct}% |`);
179
+ }
180
+ }
181
+ lines.push("");
182
+ // Per-area table
183
+ if (Object.keys(report.byArea).length > 0) {
184
+ lines.push("<details>");
185
+ lines.push("<summary>Per-area breakdown</summary>");
186
+ lines.push("");
187
+ lines.push("| Area | Top Mode | Judgments |");
188
+ lines.push("|------|----------|----------|");
189
+ for (const [area, data] of Object.entries(report.byArea).sort(([a], [b]) => a.localeCompare(b))) {
190
+ const icon = modeIcon(data.topMode);
191
+ lines.push(`| ${area} | ${icon} ${data.topMode} | ${data.totalJudgments} |`);
192
+ }
193
+ lines.push("");
194
+ lines.push("</details>");
195
+ lines.push("");
196
+ }
197
+ return lines.join("\n");
198
+ }
199
+ // ---------------------------------------------------------------------------
200
+ // Internal helpers
201
+ // ---------------------------------------------------------------------------
202
+ /** Classify by ceiling decomposition structural signals */
203
+ function classifyByCeiling(score, ceilingScore, floorScore) {
204
+ const docLift = ceilingScore - floorScore;
205
+ // Negative Doc Lift: docs are actively harmful
206
+ if (docLift < 0) {
207
+ return { confidence: "medium", mode: "outdated-docs", source: "ceiling" };
208
+ }
209
+ // High ceiling, low actual: model can't use good docs → model limitation
210
+ if (ceilingScore > 70 && score < 40) {
211
+ return {
212
+ confidence: "medium",
213
+ mode: "model-limitation",
214
+ source: "ceiling",
215
+ };
216
+ }
217
+ // High floor, low ceiling: model knows better than docs → outdated
218
+ if (floorScore > 50 && ceilingScore < floorScore + 10) {
219
+ return { confidence: "medium", mode: "outdated-docs", source: "ceiling" };
220
+ }
221
+ // Low ceiling, low floor: both docs and model knowledge insufficient
222
+ if (ceilingScore < 40 && floorScore < 30) {
223
+ return { confidence: "low", mode: "missing-docs", source: "ceiling" };
224
+ }
225
+ return null;
226
+ }
227
+ /** Classify by keyword matching on the reason text */
228
+ function classifyByKeyword(reason) {
229
+ if (OUTDATED_PATTERN.test(reason)) {
230
+ return { confidence: "high", mode: "outdated-docs", source: "keyword" };
231
+ }
232
+ if (MISSING_PATTERN.test(reason)) {
233
+ return { confidence: "high", mode: "missing-docs", source: "keyword" };
234
+ }
235
+ if (INCORRECT_PATTERN.test(reason)) {
236
+ return { confidence: "medium", mode: "incorrect-docs", source: "keyword" };
237
+ }
238
+ if (POOR_STRUCTURE_PATTERN.test(reason)) {
239
+ return { confidence: "medium", mode: "poor-structure", source: "keyword" };
240
+ }
241
+ return null;
242
+ }
243
+ /**
244
+ * Combine keyword and ceiling classifications.
245
+ *
246
+ * Priority:
247
+ * 1. If both agree on mode → high confidence, source = "keyword+ceiling"
248
+ * 2. If keyword matched → use keyword result
249
+ * 3. If only ceiling matched → use ceiling result (lower confidence)
250
+ * 4. If neither matched → unclassified
251
+ */
252
+ function combineClassifications(keyword, ceiling) {
253
+ if (keyword && ceiling) {
254
+ if (keyword.mode === ceiling.mode) {
255
+ // Both agree — boost confidence
256
+ return {
257
+ confidence: "high",
258
+ mode: keyword.mode,
259
+ source: "keyword+ceiling",
260
+ };
261
+ }
262
+ // Disagree — prefer keyword (it has more signal)
263
+ return keyword;
264
+ }
265
+ if (keyword)
266
+ return keyword;
267
+ if (ceiling)
268
+ return ceiling;
269
+ return { confidence: "low", mode: "unclassified", source: "keyword" };
270
+ }
271
+ /**
272
+ * Resolve area name from a task ID or description.
273
+ *
274
+ * Task IDs in grader judgments use human-readable descriptions
275
+ * (e.g., "GROQ - Blog queries with filtering and pagination (gold)")
276
+ * while score areas use kebab-case slugs (e.g., "groq").
277
+ *
278
+ * Strategy:
279
+ * 1. Use detectFeatureArea() which handles human-readable descriptions
280
+ * 2. Fall back to prefix matching for kebab-case task IDs
281
+ * 3. Return undefined if no match is found
282
+ */
283
+ function resolveArea(taskId, scoreByArea) {
284
+ // Strategy 1: Use the shared feature area detector (handles descriptions)
285
+ const detected = detectFeatureArea(taskId);
286
+ if (detected !== "other" && scoreByArea.has(detected))
287
+ return detected;
288
+ // Strategy 2: Direct prefix match (kebab-case task IDs like "groq-blog-queries")
289
+ for (const area of scoreByArea.keys()) {
290
+ if (taskId.startsWith(area))
291
+ return area;
292
+ }
293
+ // Strategy 3: Progressive prefix matching on hyphens
294
+ const parts = taskId.split("-");
295
+ if (parts.length > 1) {
296
+ for (let i = parts.length - 1; i >= 1; i--) {
297
+ const candidate = parts.slice(0, i).join("-");
298
+ if (scoreByArea.has(candidate))
299
+ return candidate;
300
+ }
301
+ }
302
+ return undefined;
303
+ }
304
+ /** Find the most common failure mode */
305
+ function findTopMode(modes) {
306
+ let topMode = "unclassified";
307
+ let topCount = 0;
308
+ for (const mode of ALL_MODES) {
309
+ if (mode === "unclassified")
310
+ continue; // Prefer classified modes
311
+ if ((modes[mode] ?? 0) > topCount) {
312
+ topCount = modes[mode];
313
+ topMode = mode;
314
+ }
315
+ }
316
+ // If nothing classified, return unclassified
317
+ if (topCount === 0)
318
+ return "unclassified";
319
+ return topMode;
320
+ }
321
+ /** Initialize mode counts to zero */
322
+ function initModeCounts() {
323
+ return {
324
+ "incorrect-docs": 0,
325
+ "missing-docs": 0,
326
+ "model-limitation": 0,
327
+ "outdated-docs": 0,
328
+ "poor-structure": 0,
329
+ unclassified: 0,
330
+ };
331
+ }
332
+ /** Get icon for a failure mode */
333
+ function modeIcon(mode) {
334
+ switch (mode) {
335
+ case "incorrect-docs":
336
+ return "❌";
337
+ case "missing-docs":
338
+ return "📭";
339
+ case "model-limitation":
340
+ return "🤖";
341
+ case "outdated-docs":
342
+ return "📅";
343
+ case "poor-structure":
344
+ return "🏗️";
345
+ case "unclassified":
346
+ return "❓";
347
+ }
348
+ }
@@ -0,0 +1,44 @@
1
+ /**
2
+ * fetch-url-content.ts
3
+ *
4
+ * Fetches documentation content from a URL. Tries the .md endpoint first
5
+ * (Sanity's agent-friendly format), then content-type negotiation.
6
+ * Does NOT attempt HTML-to-Markdown conversion — if the endpoint
7
+ * doesn't serve markdown, the fetch fails cleanly.
8
+ *
9
+ * Part of Phase 4: Modular doc fetching for baseline mode.
10
+ */
11
+ /** Metadata for a single URL fetch, suitable for JSON serialization */
12
+ export interface UrlFetchMetadata {
13
+ contentLength?: number;
14
+ error?: string;
15
+ method: UrlFetchResult["method"];
16
+ status?: number;
17
+ url: string;
18
+ }
19
+ /** Result of fetching documentation content from a URL */
20
+ export interface UrlFetchResult {
21
+ /** The markdown content, or undefined if fetch failed */
22
+ content?: string;
23
+ /** Error message if fetch failed */
24
+ error?: string;
25
+ /** How the content was obtained */
26
+ method: "content-negotiation" | "failed" | "md-endpoint";
27
+ /** HTTP status code */
28
+ status?: number;
29
+ /** The URL that was fetched */
30
+ url: string;
31
+ }
32
+ /**
33
+ * Fetch documentation content from a URL.
34
+ *
35
+ * Strategy:
36
+ * 1. Try `.md` endpoint with `Accept: text/markdown`
37
+ * 2. Try content-type negotiation (ask for markdown)
38
+ * 3. Fail cleanly if only HTML is available (no conversion)
39
+ *
40
+ * @param url - The documentation URL to fetch
41
+ * @param headers - Optional custom headers to merge with defaults
42
+ * @returns A `UrlFetchResult` with success or failure metadata
43
+ */
44
+ export declare function fetchUrlContent(url: string, headers?: Record<string, string>): Promise<UrlFetchResult>;
@@ -0,0 +1,93 @@
1
+ /**
2
+ * fetch-url-content.ts
3
+ *
4
+ * Fetches documentation content from a URL. Tries the .md endpoint first
5
+ * (Sanity's agent-friendly format), then content-type negotiation.
6
+ * Does NOT attempt HTML-to-Markdown conversion — if the endpoint
7
+ * doesn't serve markdown, the fetch fails cleanly.
8
+ *
9
+ * Part of Phase 4: Modular doc fetching for baseline mode.
10
+ */
11
+ /**
12
+ * Fetch documentation content from a URL.
13
+ *
14
+ * Strategy:
15
+ * 1. Try `.md` endpoint with `Accept: text/markdown`
16
+ * 2. Try content-type negotiation (ask for markdown)
17
+ * 3. Fail cleanly if only HTML is available (no conversion)
18
+ *
19
+ * @param url - The documentation URL to fetch
20
+ * @param headers - Optional custom headers to merge with defaults
21
+ * @returns A `UrlFetchResult` with success or failure metadata
22
+ */
23
+ export async function fetchUrlContent(url, headers) {
24
+ const cleanUrl = url.replace(/\/$/, "");
25
+ const mergedHeaders = {
26
+ ...headers,
27
+ "User-Agent": "SanityEvalBot/1.0",
28
+ };
29
+ // Strategy 1: Try .md endpoint
30
+ const mdUrl = cleanUrl.endsWith(".md") ? cleanUrl : `${cleanUrl}.md`;
31
+ try {
32
+ const response = await fetch(mdUrl, {
33
+ headers: { ...mergedHeaders, Accept: "text/markdown, text/plain" },
34
+ });
35
+ if (response.ok) {
36
+ const contentType = response.headers.get("content-type") ?? "";
37
+ const text = await response.text();
38
+ // Accept if content-type says markdown OR the content doesn't look like HTML
39
+ if (contentType.includes("markdown") ||
40
+ !text.trimStart().startsWith("<!DOCTYPE")) {
41
+ return {
42
+ content: text,
43
+ method: "md-endpoint",
44
+ status: response.status,
45
+ url,
46
+ };
47
+ }
48
+ }
49
+ }
50
+ catch {
51
+ // .md endpoint unavailable — try next strategy
52
+ }
53
+ // Strategy 2: Content-type negotiation (ask for markdown)
54
+ try {
55
+ const response = await fetch(cleanUrl, {
56
+ headers: { ...mergedHeaders, Accept: "text/markdown, text/plain" },
57
+ });
58
+ if (response.ok) {
59
+ const contentType = response.headers.get("content-type") ?? "";
60
+ const text = await response.text();
61
+ if (contentType.includes("markdown") ||
62
+ contentType.includes("text/plain")) {
63
+ return {
64
+ content: text,
65
+ method: "content-negotiation",
66
+ status: response.status,
67
+ url,
68
+ };
69
+ }
70
+ // Got HTML or something else — don't try to convert it
71
+ return {
72
+ error: `Endpoint returned ${contentType} instead of markdown. ` +
73
+ "The URL does not appear to support markdown content negotiation.",
74
+ method: "failed",
75
+ status: response.status,
76
+ url,
77
+ };
78
+ }
79
+ return {
80
+ error: `HTTP ${response.status}`,
81
+ method: "failed",
82
+ status: response.status,
83
+ url,
84
+ };
85
+ }
86
+ catch (err) {
87
+ return {
88
+ error: `Network error: ${err.message}`,
89
+ method: "failed",
90
+ url,
91
+ };
92
+ }
93
+ }
@@ -0,0 +1,48 @@
1
+ /**
2
+ * pipeline/gap-analysis.ts
3
+ *
4
+ * Expected impact estimation for identified gaps.
5
+ *
6
+ * Phase 3b of the Scenario Matrix implementation.
7
+ *
8
+ * Given failure modes and scores, estimates the score lift that fixing
9
+ * each gap would produce. Gaps are prioritized by estimated lift × task count
10
+ * to produce an actionable remediation plan.
11
+ *
12
+ * The estimation model is conservative: it assumes fixing a gap raises the
13
+ * bottleneck dimension to the median of non-bottlenecked dimensions (not 100).
14
+ * This produces realistic estimates rather than theoretical maximums.
15
+ *
16
+ * @see docs/exec-plans/completed/scenario-matrix-implementation/phase-3-gap-analysis.md
17
+ */
18
+ import type { FailureModeReport, FeatureScore, GapAnalysisReport, GapEstimate } from "./types.js";
19
+ /**
20
+ * Build a complete gap analysis report.
21
+ *
22
+ * @param failureModeReport - Classified failure modes from Phase 3a
23
+ * @param scores - Per-area feature scores
24
+ * @param weights - Dimension weights
25
+ * @returns Gap analysis report with prioritized remediation plan
26
+ */
27
+ export declare function buildGapAnalysisReport(failureModeReport: FailureModeReport, scores: FeatureScore[], weights?: Record<string, number>): GapAnalysisReport;
28
+ /**
29
+ * Estimate the impact of fixing each identified gap.
30
+ *
31
+ * For each area with failure modes, calculates the potential score lift
32
+ * from fixing the identified issues. Uses a conservative estimation model
33
+ * that targets the median of non-bottlenecked dimensions rather than 100.
34
+ *
35
+ * @param failureModeReport - Classified failure modes from Phase 3a
36
+ * @param scores - Per-area feature scores
37
+ * @param weights - Dimension weights (defaults to rubrics.yaml weights)
38
+ * @returns Gap estimates sorted by priority (highest first)
39
+ */
40
+ export declare function estimateImpact(failureModeReport: FailureModeReport, scores: FeatureScore[], weights?: Record<string, number>): GapEstimate[];
41
+ /**
42
+ * Format a gap analysis report for console output.
43
+ */
44
+ export declare function formatGapAnalysisConsole(report: GapAnalysisReport): string;
45
+ /**
46
+ * Format a gap analysis report as markdown for PR comments.
47
+ */
48
+ export declare function formatGapAnalysisMarkdown(report: GapAnalysisReport): string;