@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,238 @@
1
+ /**
2
+ * Interactive mode — guided wizard for the evaluation pipeline.
3
+ *
4
+ * When `ailf` is run with no arguments (or `ailf interactive`), this module
5
+ * prompts the user through mode selection, area scoping, debug options,
6
+ * and common flags — then builds and executes the equivalent `ailf pipeline`
7
+ * command.
8
+ *
9
+ * Uses @inquirer/prompts for a clean, modern terminal UI.
10
+ */
11
+ import { Command } from "commander";
12
+ export function createInteractiveCommand() {
13
+ return new Command("interactive")
14
+ .description("Guided wizard for common evaluation workflows")
15
+ .action(async () => {
16
+ const result = await runInteractiveWizard();
17
+ if (!result)
18
+ return;
19
+ console.log();
20
+ console.log(` ▸ ailf ${result.command} ${result.args.join(" ")}`);
21
+ console.log();
22
+ // Dynamically import the CLI entry point's program and re-parse
23
+ // with the synthesized argv. This avoids shelling out and keeps
24
+ // everything in-process.
25
+ const { execSync } = await import("child_process");
26
+ const fullCommand = `tsx src/cli.ts ${result.command} ${result.args.join(" ")}`;
27
+ try {
28
+ execSync(fullCommand, { stdio: "inherit" });
29
+ }
30
+ catch (err) {
31
+ const code = err !== null && typeof err === "object" && "status" in err
32
+ ? err.status
33
+ : 1;
34
+ process.exit(code);
35
+ }
36
+ });
37
+ }
38
+ // ---------------------------------------------------------------------------
39
+ // Wizard steps
40
+ // ---------------------------------------------------------------------------
41
+ async function runInteractiveWizard() {
42
+ const { confirm, input, select } = await import("@inquirer/prompts");
43
+ console.log();
44
+ console.log(" 🧙 AI Literacy Framework — Interactive Mode");
45
+ console.log(" ────────────────────────────────────────────");
46
+ console.log();
47
+ // Step 1: Choose workflow
48
+ const workflow = await select({
49
+ choices: [
50
+ {
51
+ description: "Full evaluation pipeline (fetch → eval → score → report)",
52
+ name: "Run pipeline",
53
+ value: "pipeline",
54
+ },
55
+ {
56
+ description: "Compare current scores against a saved baseline",
57
+ name: "Compare scores",
58
+ value: "compare",
59
+ },
60
+ {
61
+ description: "Check YAML files, mappings, and reference solutions",
62
+ name: "Validate config",
63
+ value: "validate",
64
+ },
65
+ {
66
+ description: "Save, compare, or list historical score snapshots",
67
+ name: "Manage baselines",
68
+ value: "baseline",
69
+ },
70
+ {
71
+ description: "Weekly evaluation trends and area summaries",
72
+ name: "Weekly digest",
73
+ value: "weekly-digest",
74
+ },
75
+ {
76
+ description: "Measure grader reliability and discrimination power",
77
+ name: "Grader tools",
78
+ value: "grader",
79
+ },
80
+ ],
81
+ message: "What would you like to do?",
82
+ });
83
+ if (workflow === "compare") {
84
+ return { args: [], command: "compare" };
85
+ }
86
+ if (workflow === "validate") {
87
+ return { args: [], command: "validate" };
88
+ }
89
+ if (workflow === "weekly-digest") {
90
+ const dryRun = await confirm({
91
+ default: true,
92
+ message: "Dry run? (preview to stdout, don't send to Slack)",
93
+ });
94
+ return { args: dryRun ? ["--dry-run"] : [], command: "weekly-digest" };
95
+ }
96
+ if (workflow === "baseline") {
97
+ const subcommand = await select({
98
+ choices: [
99
+ { name: "Save current scores", value: "save" },
100
+ { name: "Compare against latest", value: "compare" },
101
+ { name: "List saved baselines", value: "history" },
102
+ ],
103
+ message: "Baseline operation:",
104
+ });
105
+ return { args: [subcommand], command: "baseline" };
106
+ }
107
+ if (workflow === "grader") {
108
+ const subcommand = await select({
109
+ choices: [
110
+ {
111
+ description: "Measure grading variance across replications",
112
+ name: "Consistency analysis",
113
+ value: "consistency",
114
+ },
115
+ {
116
+ description: "Test grader discrimination across quality levels",
117
+ name: "Sensitivity test",
118
+ value: "sensitivity",
119
+ },
120
+ {
121
+ description: "Compare two grader models head-to-head",
122
+ name: "Compare graders",
123
+ value: "compare",
124
+ },
125
+ {
126
+ description: "Validate against human reference grades",
127
+ name: "Validate grader",
128
+ value: "validate",
129
+ },
130
+ ],
131
+ message: "Grader tool:",
132
+ });
133
+ return { args: [subcommand], command: "grader" };
134
+ }
135
+ // --- Pipeline-specific wizard continues below ---
136
+ const args = [];
137
+ // Step 2: Evaluation mode
138
+ const mode = await select({
139
+ choices: [
140
+ {
141
+ description: "Evaluate with pre-fetched documentation context",
142
+ name: "Baseline (with docs vs without docs)",
143
+ value: "baseline",
144
+ },
145
+ {
146
+ description: "Baseline + record HTTP request patterns",
147
+ name: "Observed (instrumented)",
148
+ value: "observed",
149
+ },
150
+ {
151
+ description: "Agent searches for docs itself via web tools",
152
+ name: "Agentic (agent-driven retrieval)",
153
+ value: "agentic",
154
+ },
155
+ ],
156
+ message: "Evaluation mode:",
157
+ });
158
+ if (mode !== "baseline") {
159
+ args.push("--mode", mode);
160
+ }
161
+ // Step 3: Area scoping
162
+ const scopeByArea = await confirm({
163
+ default: false,
164
+ message: "Scope to specific feature areas?",
165
+ });
166
+ if (scopeByArea) {
167
+ const areas = await input({
168
+ message: "Feature areas (comma-separated, e.g. groq,frameworks):",
169
+ });
170
+ if (areas.trim()) {
171
+ args.push("--area", areas.trim());
172
+ }
173
+ }
174
+ // Step 4: Debug mode
175
+ const debug = await confirm({
176
+ default: false,
177
+ message: "Enable debug mode? (run a subset of tests for fast feedback)",
178
+ });
179
+ if (debug) {
180
+ args.push("--debug");
181
+ const debugStyle = await select({
182
+ choices: [
183
+ { name: "First 2 tests (default)", value: "default" },
184
+ { name: "First N tests", value: "first-n" },
185
+ { name: "Random sample", value: "sample" },
186
+ { name: "Filter by pattern", value: "pattern" },
187
+ ],
188
+ message: "Debug scope:",
189
+ });
190
+ if (debugStyle === "first-n") {
191
+ const n = await input({ default: "5", message: "Number of tests:" });
192
+ args.push("--debug-n", n);
193
+ }
194
+ else if (debugStyle === "sample") {
195
+ const n = await input({
196
+ default: "3",
197
+ message: "Sample size:",
198
+ });
199
+ args.push("--debug-sample", n);
200
+ }
201
+ else if (debugStyle === "pattern") {
202
+ const pattern = await input({
203
+ message: "Description regex (e.g. Blog, webhook):",
204
+ });
205
+ if (pattern.trim()) {
206
+ args.push("--debug-pattern", pattern.trim());
207
+ }
208
+ }
209
+ }
210
+ // Step 5: Common pipeline options
211
+ const dryRun = await confirm({
212
+ default: false,
213
+ message: "Dry run? (validate config only, no API calls)",
214
+ });
215
+ if (dryRun) {
216
+ args.push("--dry-run");
217
+ }
218
+ if (!dryRun) {
219
+ const compareAfter = await confirm({
220
+ default: false,
221
+ message: "Compare against baseline after evaluation?",
222
+ });
223
+ if (compareAfter) {
224
+ args.push("--compare");
225
+ }
226
+ }
227
+ // Step 6: Preview execution plan before running
228
+ if (!dryRun) {
229
+ const preview = await confirm({
230
+ default: true,
231
+ message: "Preview execution plan before running? (--explain --yes)",
232
+ });
233
+ if (preview) {
234
+ args.push("--explain", "--yes");
235
+ }
236
+ }
237
+ return { args, command: "pipeline" };
238
+ }
@@ -0,0 +1,15 @@
1
+ /**
2
+ * lookup-doc command — search Sanity for documentation articles by keyword.
3
+ *
4
+ * Helps external contributors find the correct `slug` for canonicalDocs
5
+ * references without needing to browse the CMS or guess from URLs.
6
+ *
7
+ * Usage:
8
+ * ailf lookup-doc webhooks
9
+ * ailf lookup-doc "visual editing"
10
+ * ailf lookup-doc groq --limit 20
11
+ *
12
+ * @see docs/design-docs/canonical-doc-resolution.md
13
+ */
14
+ import { Command } from "commander";
15
+ export declare function createLookupDocCommand(): Command;
@@ -0,0 +1,84 @@
1
+ /**
2
+ * lookup-doc command — search Sanity for documentation articles by keyword.
3
+ *
4
+ * Helps external contributors find the correct `slug` for canonicalDocs
5
+ * references without needing to browse the CMS or guess from URLs.
6
+ *
7
+ * Usage:
8
+ * ailf lookup-doc webhooks
9
+ * ailf lookup-doc "visual editing"
10
+ * ailf lookup-doc groq --limit 20
11
+ *
12
+ * @see docs/design-docs/canonical-doc-resolution.md
13
+ */
14
+ import { Command } from "commander";
15
+ export function createLookupDocCommand() {
16
+ return new Command("lookup-doc")
17
+ .description("Search Sanity docs by keyword — find slugs for canonicalDocs references")
18
+ .argument("<keyword>", "Search keyword (matches title and slug)")
19
+ .option("-l, --limit <n>", "Maximum results to show", parseInt, 10)
20
+ .option("-s, --source <name>", "Documentation source (from sources.yaml)")
21
+ .action(async (keyword, opts) => {
22
+ const { getSanityClient } = await import("../sanity/client.js");
23
+ const { loadSource } = await import("../sources.js");
24
+ // Resolve source if provided
25
+ let clientOverrides;
26
+ if (opts.source) {
27
+ try {
28
+ const source = loadSource(opts.source);
29
+ clientOverrides = {};
30
+ if (source.dataset)
31
+ clientOverrides.dataset = source.dataset;
32
+ if (source.projectId)
33
+ clientOverrides.projectId = source.projectId;
34
+ }
35
+ catch (err) {
36
+ const msg = err instanceof Error ? err.message : String(err);
37
+ console.error(`❌ Failed to load source "${opts.source}": ${msg}`);
38
+ process.exit(1);
39
+ }
40
+ }
41
+ const client = getSanityClient(clientOverrides);
42
+ const escapedKeyword = keyword.replace(/"/g, '\\"');
43
+ console.log(`\nSearching for "${keyword}"...\n`);
44
+ const query = `*[_type == "article"
45
+ && !(_id in path("drafts.**"))
46
+ && (
47
+ title match "*${escapedKeyword}*"
48
+ || slug.current match "*${escapedKeyword}*"
49
+ )
50
+ ] | order(title asc) [0...$limit] {
51
+ title,
52
+ "slug": slug.current,
53
+ "section": primarySection->title,
54
+ "sectionSlug": primarySection->slug.current
55
+ }`;
56
+ const results = await client.fetch(query, {
57
+ limit: opts.limit,
58
+ });
59
+ if (results.length === 0) {
60
+ console.log(` No articles found matching "${keyword}".\n\n` +
61
+ " Tips:\n" +
62
+ " - Try a shorter or more general keyword\n" +
63
+ " - Use partial matches (e.g., 'webhook' instead of 'webhooks')\n");
64
+ process.exit(0);
65
+ }
66
+ console.log(` Found ${results.length} article${results.length === 1 ? "" : "s"}:\n`);
67
+ // Find longest slug for alignment
68
+ const maxSlugLen = Math.min(40, Math.max(...results.map((r) => r.slug.length)));
69
+ for (const doc of results) {
70
+ const section = doc.section ?? "Unknown";
71
+ const paddedSlug = doc.slug.padEnd(maxSlugLen);
72
+ console.log(` slug: ${paddedSlug} │ ${doc.title}`);
73
+ console.log(` ${"".padEnd(maxSlugLen + 6)} │ Section: ${section}\n`);
74
+ }
75
+ console.log(" Usage in .ailf/tasks/*.yaml:\n");
76
+ console.log(" canonicalDocs:");
77
+ console.log(` - slug: ${results[0].slug}`);
78
+ console.log(` reason: "${results[0].title}"`);
79
+ if (results[0].sectionSlug) {
80
+ console.log(`\n Or use path: ${results[0].sectionSlug}/${results[0].slug}`);
81
+ }
82
+ console.log();
83
+ });
84
+ }
@@ -0,0 +1,5 @@
1
+ /**
2
+ * measure-retrieval command — evaluate Sanity text search retrieval quality.
3
+ */
4
+ import { Command } from "commander";
5
+ export declare function createMeasureRetrievalCommand(): Command;
@@ -0,0 +1,65 @@
1
+ /**
2
+ * measure-retrieval command — evaluate Sanity text search retrieval quality.
3
+ */
4
+ import { mkdirSync, writeFileSync } from "fs";
5
+ import { dirname, join, resolve } from "path";
6
+ import { fileURLToPath } from "url";
7
+ import { Command } from "commander";
8
+ import { getSanityClient } from "../sanity/client.js";
9
+ import { formatRetrievalTable, measureRetrieval, } from "../pipeline/measure-retrieval.js";
10
+ const __dirname = dirname(fileURLToPath(import.meta.url));
11
+ const ROOT = resolve(__dirname, "..", "..");
12
+ // ---------------------------------------------------------------------------
13
+ // Sanity text search retriever
14
+ // ---------------------------------------------------------------------------
15
+ async function retrieveDocsForQuery(query, k = 10) {
16
+ const client = getSanityClient();
17
+ const results = await client.fetch(`
18
+ *[_type == "article" && !(_id in path("drafts.**"))]
19
+ | score(
20
+ boost(title match $query, 3),
21
+ boost(pt::text(content) match $query, 1)
22
+ )
23
+ | order(_score desc)
24
+ [0...$k] {
25
+ "slug": slug.current,
26
+ _score
27
+ }
28
+ `, { k, query });
29
+ return results.map((r) => r.slug);
30
+ }
31
+ // ---------------------------------------------------------------------------
32
+ // Command factory
33
+ // ---------------------------------------------------------------------------
34
+ export function createMeasureRetrievalCommand() {
35
+ return new Command("measure-retrieval")
36
+ .description("Measure retrieval quality against canonical document annotations")
37
+ .action(async () => {
38
+ try {
39
+ console.log("=== Sanity AI Literacy — Retrieval Quality Measurement ===\n");
40
+ const summary = await measureRetrieval({
41
+ onProgress: (_area, taskId, result) => {
42
+ console.log(` ${taskId}:`);
43
+ console.log(` Recall@5: ${(result.recall_at_5 * 100).toFixed(1)}%`);
44
+ console.log(` Recall@10: ${(result.recall_at_10 * 100).toFixed(1)}%`);
45
+ console.log(` NDCG@10: ${(result.ndcg_at_10 * 100).toFixed(1)}%`);
46
+ },
47
+ retriever: retrieveDocsForQuery,
48
+ rootDir: ROOT,
49
+ });
50
+ // Print summary
51
+ console.log();
52
+ console.log(formatRetrievalTable(summary));
53
+ // Persist results
54
+ const outDir = join(ROOT, "results", "latest");
55
+ mkdirSync(outDir, { recursive: true });
56
+ writeFileSync(join(outDir, "retrieval-results.json"), JSON.stringify(summary, null, 2));
57
+ console.log("\nResults written to results/latest/retrieval-results.json");
58
+ }
59
+ catch (err) {
60
+ process.exitCode = 1;
61
+ if (err instanceof Error)
62
+ console.error(err.message);
63
+ }
64
+ });
65
+ }
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Pipeline action — resolves CLI options and orchestrates pipeline steps.
3
+ *
4
+ * This file is the thin CLI-to-orchestrator bridge. The bulk of execution
5
+ * logic lives in packages/eval/src/orchestration/.
6
+ *
7
+ * Responsibilities:
8
+ * - Resolve CLI flags into typed ResolvedOptions
9
+ * - Delegate to the PipelineOrchestrator for step execution
10
+ *
11
+ * @see packages/eval/src/orchestration/ for the step-based pipeline
12
+ */
13
+ import { type ImpactSummary } from "../pipeline/reverse-mapping.js";
14
+ import type { DebugOptions, EvalMode } from "../pipeline/types.js";
15
+ import type { PipelineCliOptions } from "./pipeline.js";
16
+ export interface ResolvedOptions {
17
+ allowedOriginArgs: string[];
18
+ areaOption?: string;
19
+ beforeOption?: string;
20
+ changedDocsOption?: string;
21
+ compareBaseline?: string;
22
+ compareEnabled: boolean;
23
+ compareThreshold?: number;
24
+ concurrency?: number;
25
+ datasetOverride?: string;
26
+ debug?: DebugOptions;
27
+ discoveryReportEnabled: boolean;
28
+ dryRun: boolean;
29
+ gapAnalysisEnabled: boolean;
30
+ graderReplications?: number;
31
+ headerArgs: string[];
32
+ impactSummary?: ImpactSummary;
33
+ mode: EvalMode;
34
+ noCache: boolean;
35
+ noRemoteCache: boolean;
36
+ outputPath?: string;
37
+ perspectiveOverride?: string;
38
+ projectIdOverride?: string;
39
+ promptfooUrl?: string;
40
+ publishEnabled: boolean;
41
+ publishTag?: string;
42
+ readinessEnabled: boolean;
43
+ reportDataset?: string;
44
+ reportProjectId?: string;
45
+ sanityDocumentArgs: string[];
46
+ searchMode: string;
47
+ skipEval: boolean;
48
+ skipFetch: boolean;
49
+ source?: string;
50
+ studioOriginOverride?: string;
51
+ repoTasksPath?: string;
52
+ taskOption?: string;
53
+ taskSourceType?: "content-lake" | "yaml";
54
+ urlArgs: string[];
55
+ }
56
+ /**
57
+ * Pure option resolution — computes ResolvedOptions from CLI flags without
58
+ * any side effects. Safe to call from --explain without mutating process.env.
59
+ *
60
+ * Exported so the plan builder can call it independently.
61
+ */
62
+ export declare function computeResolvedOptions(opts: PipelineCliOptions): ResolvedOptions;
63
+ /**
64
+ * Execute the evaluation pipeline.
65
+ *
66
+ * 1. Resolve CLI options into typed ResolvedOptions
67
+ * 2. Build AppContext (composition root wires adapters)
68
+ * 3. Build step sequence from context
69
+ * 4. Delegate to the PipelineOrchestrator
70
+ */
71
+ export declare function executePipeline(cliOpts: PipelineCliOptions): Promise<void>;