@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,253 @@
1
+ /**
2
+ * publish command — manually publish a local evaluation report to the
3
+ * Sanity Content Lake without re-running the pipeline.
4
+ *
5
+ * Reads a score-summary.json (defaulting to results/latest/score-summary.json),
6
+ * builds provenance, writes the report to Sanity, and fans out to configured
7
+ * sinks — exactly the same as the publish step in `ailf pipeline`, but
8
+ * standalone.
9
+ *
10
+ * Uses createAppContext() (composition root) for all infrastructure access.
11
+ *
12
+ * @example
13
+ * ailf publish # default path
14
+ * ailf publish ./my-results/score-summary.json # custom path
15
+ * ailf publish --tag "manual-2026-03-13" # with a label
16
+ * ailf publish --dry-run # preview without writing
17
+ *
18
+ * @see packages/eval/src/composition-root.ts
19
+ * @see docs/design-docs/report-store/architecture.md
20
+ */
21
+ import { existsSync, readFileSync } from "fs";
22
+ import { dirname, resolve } from "path";
23
+ import { fileURLToPath } from "url";
24
+ import { Command } from "commander";
25
+ import { createAppContext } from "../composition-root.js";
26
+ import { buildProvenance, } from "../pipeline/provenance.js";
27
+ import { generateReportId } from "../report-store.js";
28
+ import { withRetry } from "../sinks/retry.js";
29
+ const __dirname = dirname(fileURLToPath(import.meta.url));
30
+ const ROOT = resolve(__dirname, "..", "..");
31
+ const DEFAULT_SUMMARY_PATH = resolve(ROOT, "results", "latest", "score-summary.json");
32
+ export function createPublishCommand() {
33
+ return new Command("publish")
34
+ .description("Publish a local evaluation report to the Sanity Content Lake")
35
+ .argument("[summary-path]", "Path to score-summary.json", DEFAULT_SUMMARY_PATH)
36
+ .option("-t, --tag <tag>", "Label for the published report")
37
+ .option("-n, --dry-run", "Preview the report without writing to Sanity or sinks", false)
38
+ .action(async (summaryPath, opts) => {
39
+ await runPublishCommand(summaryPath, opts);
40
+ });
41
+ }
42
+ // ---------------------------------------------------------------------------
43
+ // Provenance builder (from score summary, not full pipeline context)
44
+ // ---------------------------------------------------------------------------
45
+ /**
46
+ * Build a ProvenanceInput from a ScoreSummary for manual publish.
47
+ *
48
+ * When publishing outside the pipeline, we reconstruct what we can from
49
+ * the summary metadata and environment. Some fields (contextHash,
50
+ * promptfooUrl) are not available for manual publishes.
51
+ */
52
+ function buildProvenanceFromSummary(summary) {
53
+ const areas = summary.scores.map((s) => s.feature);
54
+ const mode = (process.env.EVAL_MODE ?? "baseline");
55
+ const source = {
56
+ baseUrl: summary.source?.baseUrl ?? "https://www.sanity.io/docs",
57
+ dataset: summary.source?.dataset ?? process.env.SANITY_DATASET ?? "next",
58
+ documentIds: [],
59
+ llmsTxt: (summary.source?.baseUrl ?? "https://www.sanity.io/docs") + "/llms.txt",
60
+ name: summary.source?.name ?? "production",
61
+ perspective: summary.source?.perspective ??
62
+ process.env.SANITY_PERSPECTIVE ??
63
+ undefined,
64
+ priorityDomain: "sanity.io",
65
+ projectId: summary.source?.projectId ?? process.env.SANITY_PROJECT_ID ?? "3do82whm",
66
+ studioOrigin: "https://admin.sanity.io",
67
+ urls: [],
68
+ };
69
+ return {
70
+ areas,
71
+ mode,
72
+ rootDir: ROOT,
73
+ source,
74
+ };
75
+ }
76
+ // ---------------------------------------------------------------------------
77
+ // Command implementation
78
+ // ---------------------------------------------------------------------------
79
+ async function runPublishCommand(summaryPath, opts) {
80
+ // Wire up infrastructure via composition root
81
+ const ctx = createAppContext({
82
+ compareEnabled: false,
83
+ discoveryReportEnabled: false,
84
+ gapAnalysisEnabled: false,
85
+ mode: "baseline",
86
+ noCache: true,
87
+ noRemoteCache: true,
88
+ publishEnabled: true,
89
+ publishTag: opts.tag,
90
+ readinessEnabled: false,
91
+ rootDir: ROOT,
92
+ searchMode: "open",
93
+ skipEval: true,
94
+ skipFetch: true,
95
+ });
96
+ const store = ctx.reportStore;
97
+ const sinks = (ctx.sinks ?? []);
98
+ console.log();
99
+ console.log("=== Publish Report ===");
100
+ console.log();
101
+ // -----------------------------------------------------------------------
102
+ // 1. Resolve and read the score summary
103
+ // -----------------------------------------------------------------------
104
+ const resolvedPath = resolve(summaryPath);
105
+ if (!existsSync(resolvedPath)) {
106
+ console.error(` ✖ File not found: ${resolvedPath}`);
107
+ console.error();
108
+ console.error(" Hint: Run `ailf pipeline` first to generate results,");
109
+ console.error(" or provide a path to an existing score-summary.json.");
110
+ process.exit(1);
111
+ }
112
+ let summary;
113
+ try {
114
+ summary = JSON.parse(readFileSync(resolvedPath, "utf-8"));
115
+ }
116
+ catch (err) {
117
+ console.error(` ✖ Failed to parse ${resolvedPath}: ${err instanceof Error ? err.message : String(err)}`);
118
+ process.exit(1);
119
+ }
120
+ // Basic validation — a ScoreSummary must have scores
121
+ if (!summary.scores || !Array.isArray(summary.scores)) {
122
+ console.error(" ✖ Invalid score-summary.json: missing `scores` array.");
123
+ process.exit(1);
124
+ }
125
+ const areaCount = summary.scores.length;
126
+ const overallScore = summary.overall.avgScore;
127
+ console.log(` File: ${resolvedPath}`);
128
+ console.log(` Areas: ${areaCount} (${summary.scores.map((s) => s.feature).join(", ")})`);
129
+ console.log(` Overall: ${overallScore.toFixed(1)}`);
130
+ if (opts.tag) {
131
+ console.log(` Tag: ${opts.tag}`);
132
+ }
133
+ console.log(` Mode: ${opts.dryRun ? "dry run (no writes)" : "live"}`);
134
+ console.log();
135
+ // -----------------------------------------------------------------------
136
+ // 2. Build provenance
137
+ // -----------------------------------------------------------------------
138
+ const provenanceInput = buildProvenanceFromSummary(summary);
139
+ const provenance = buildProvenance(provenanceInput);
140
+ // -----------------------------------------------------------------------
141
+ // 3. Create report
142
+ // -----------------------------------------------------------------------
143
+ const now = new Date().toISOString();
144
+ if (!store) {
145
+ console.warn(" ⚠️ No Sanity API token found. Set AILF_REPORT_SANITY_API_TOKEN");
146
+ console.warn(" or SANITY_API_TOKEN in your .env file.");
147
+ if (!opts.dryRun) {
148
+ console.warn(" Cannot publish without a token. Exiting.");
149
+ process.exit(1);
150
+ }
151
+ }
152
+ // Auto-compare against most recent comparable baseline
153
+ const comparison = opts.dryRun || !store
154
+ ? null
155
+ : await store.autoCompare(summary, provenance, now);
156
+ const reportId = generateReportId();
157
+ const report = {
158
+ comparison: comparison ?? undefined,
159
+ completedAt: now,
160
+ durationMs: 0, // manual publish — no pipeline duration
161
+ id: reportId,
162
+ provenance,
163
+ summary,
164
+ tag: opts.tag,
165
+ };
166
+ // -----------------------------------------------------------------------
167
+ // 4. Dry run — print preview and exit
168
+ // -----------------------------------------------------------------------
169
+ if (opts.dryRun) {
170
+ console.log(" --- Report Preview ---");
171
+ console.log(` ID: ${reportId}`);
172
+ console.log(` Completed: ${now}`);
173
+ console.log(` Mode: ${provenance.mode}`);
174
+ console.log(` Source: ${provenance.source.name}`);
175
+ console.log(` Models: ${provenance.models.map((m) => m.id).join(", ")}`);
176
+ console.log(` Grader: ${provenance.graderModel}`);
177
+ console.log(` Trigger: ${provenance.trigger.type}`);
178
+ if (opts.tag) {
179
+ console.log(` Tag: ${opts.tag}`);
180
+ }
181
+ console.log();
182
+ if (sinks.length > 0) {
183
+ console.log(` Sinks: ${sinks.map((s) => s.name).join(", ")}`);
184
+ }
185
+ else {
186
+ console.log(" Sinks: none configured");
187
+ }
188
+ console.log();
189
+ console.log(" ✔ Dry run complete. No data was written.");
190
+ return;
191
+ }
192
+ // -----------------------------------------------------------------------
193
+ // 5. Write to Sanity (system of record)
194
+ // -----------------------------------------------------------------------
195
+ console.log(" Writing to Sanity Content Lake...");
196
+ const sanityResult = store ? await store.write(report) : null;
197
+ if (sanityResult) {
198
+ console.log(` ✅ Report written: ${sanityResult}`);
199
+ }
200
+ else {
201
+ console.warn(" ⚠️ Sanity write failed (see warnings above)");
202
+ }
203
+ // -----------------------------------------------------------------------
204
+ // 6. Auto-comparison result
205
+ // -----------------------------------------------------------------------
206
+ if (comparison) {
207
+ const delta = comparison.deltas.overall;
208
+ const sign = delta >= 0 ? "+" : "";
209
+ console.log(` 📊 Auto-compared vs baseline: ${sign}${delta.toFixed(1)}`);
210
+ }
211
+ // -----------------------------------------------------------------------
212
+ // 7. Fan out to sinks
213
+ // -----------------------------------------------------------------------
214
+ if (sinks.length > 0) {
215
+ console.log();
216
+ console.log(` Delivering to ${sinks.length} sink(s)...`);
217
+ const settled = await Promise.allSettled(sinks.map(async (sink) => {
218
+ const result = await withRetry(() => sink.publish(report));
219
+ return { name: sink.name, result };
220
+ }));
221
+ for (const outcome of settled) {
222
+ if (outcome.status === "fulfilled") {
223
+ const { name, result } = outcome.value;
224
+ if (result.status === "failed") {
225
+ console.warn(` ⚠️ Sink ${name} failed: ${result.error}`);
226
+ }
227
+ else if (result.status === "skipped") {
228
+ console.log(` ⏭️ Sink ${name} skipped: ${result.reason}`);
229
+ }
230
+ else {
231
+ console.log(` ✅ Sink ${name} delivered${result.detail ? ` (${result.detail})` : ""}`);
232
+ }
233
+ }
234
+ else {
235
+ const error = outcome.reason instanceof Error
236
+ ? outcome.reason.message
237
+ : String(outcome.reason);
238
+ console.warn(` ⚠️ Sink delivery error: ${error}`);
239
+ }
240
+ }
241
+ }
242
+ // -----------------------------------------------------------------------
243
+ // 8. Summary
244
+ // -----------------------------------------------------------------------
245
+ console.log();
246
+ if (sanityResult) {
247
+ console.log(` ✔ Published report ${reportId}`);
248
+ }
249
+ else {
250
+ console.log(" ✖ Report was not persisted to Sanity.");
251
+ process.exit(1);
252
+ }
253
+ }
@@ -0,0 +1,10 @@
1
+ /**
2
+ * readiness-report command — generates a launch readiness checklist
3
+ * for a given feature area.
4
+ *
5
+ * Combines threshold evaluation, ceiling decomposition, and gap analysis
6
+ * into a single actionable report.
7
+ *
8
+ */
9
+ import { Command } from "commander";
10
+ export declare function createReadinessReportCommand(): Command;
@@ -0,0 +1,104 @@
1
+ /**
2
+ * readiness-report command — generates a launch readiness checklist
3
+ * for a given feature area.
4
+ *
5
+ * Combines threshold evaluation, ceiling decomposition, and gap analysis
6
+ * into a single actionable report.
7
+ *
8
+ */
9
+ import { Command } from "commander";
10
+ import { existsSync, readFileSync, readdirSync, writeFileSync } from "fs";
11
+ import { dirname, join, resolve } from "path";
12
+ import { fileURLToPath } from "url";
13
+ import { load } from "js-yaml";
14
+ import { formatReadinessMarkdown, generateReadinessReport, } from "../pipeline/readiness-report.js";
15
+ import { ThresholdConfigSchema, } from "../pipeline/schemas.js";
16
+ const __dirname = dirname(fileURLToPath(import.meta.url));
17
+ const ROOT = resolve(__dirname, "..", "..");
18
+ const SCORE_SUMMARY_PATH = join(ROOT, "results", "latest", "score-summary.json");
19
+ const GAP_ANALYSIS_PATH = join(ROOT, "results", "latest", "gap-analysis.json");
20
+ const THRESHOLDS_PATH = join(ROOT, "config", "thresholds.yaml");
21
+ const BASELINES_DIR = join(ROOT, "results", "baselines");
22
+ export function createReadinessReportCommand() {
23
+ return new Command("readiness-report")
24
+ .description("Generate launch readiness report for a feature area")
25
+ .requiredOption("-a, --area <area>", "Feature area to evaluate (required)")
26
+ .option("-H, --history", "Include historical progress from baselines", false)
27
+ .option("-o, --output <path>", "Write markdown to file instead of stdout")
28
+ .action(async (opts) => {
29
+ // Load score summary
30
+ if (!existsSync(SCORE_SUMMARY_PATH)) {
31
+ console.error(`❌ Score summary not found at ${SCORE_SUMMARY_PATH}. Run \`pnpm pipeline\` first.`);
32
+ process.exit(1);
33
+ }
34
+ const scoreSummary = JSON.parse(readFileSync(SCORE_SUMMARY_PATH, "utf-8"));
35
+ // Load threshold config
36
+ if (!existsSync(THRESHOLDS_PATH)) {
37
+ console.error(`❌ Threshold config not found at ${THRESHOLDS_PATH}.`);
38
+ process.exit(1);
39
+ }
40
+ const rawThresholds = readFileSync(THRESHOLDS_PATH, "utf-8");
41
+ const parsedThresholds = load(rawThresholds);
42
+ const thresholdResult = ThresholdConfigSchema.safeParse(parsedThresholds);
43
+ if (!thresholdResult.success) {
44
+ const messages = thresholdResult.error.issues
45
+ .map((i) => ` ${i.path.join(".")}: ${i.message}`)
46
+ .join("\n");
47
+ console.error(`❌ Invalid thresholds.yaml:\n${messages}`);
48
+ process.exit(1);
49
+ }
50
+ const thresholdConfig = thresholdResult.data;
51
+ // Load gap analysis (optional)
52
+ let gapAnalysis;
53
+ if (existsSync(GAP_ANALYSIS_PATH)) {
54
+ gapAnalysis = JSON.parse(readFileSync(GAP_ANALYSIS_PATH, "utf-8"));
55
+ }
56
+ const history = [];
57
+ if (opts.history && existsSync(BASELINES_DIR)) {
58
+ const files = readdirSync(BASELINES_DIR)
59
+ .filter((f) => f.endsWith(".json"))
60
+ .sort();
61
+ for (const file of files) {
62
+ try {
63
+ const raw = readFileSync(join(BASELINES_DIR, file), "utf-8");
64
+ const data = JSON.parse(raw);
65
+ const areaScore = data.scores?.find((s) => s.feature === opts.area);
66
+ if (!areaScore)
67
+ continue;
68
+ const nameWithoutExt = file.replace(/\.json$/, "");
69
+ const parts = nameWithoutExt.split("_");
70
+ const tag = parts.length > 4 ? parts.slice(4).join("_") : undefined;
71
+ history.push({
72
+ score: areaScore.totalScore,
73
+ tag,
74
+ timestamp: data.timestamp ?? nameWithoutExt,
75
+ });
76
+ }
77
+ catch {
78
+ // Skip malformed baseline files
79
+ }
80
+ }
81
+ }
82
+ // Generate report
83
+ const report = generateReadinessReport({
84
+ area: opts.area,
85
+ gapAnalysis,
86
+ history,
87
+ scoreSummary,
88
+ thresholdConfig,
89
+ });
90
+ // Format and output
91
+ const markdown = formatReadinessMarkdown(report);
92
+ if (opts.output) {
93
+ writeFileSync(opts.output, markdown, "utf-8");
94
+ console.error(`✅ Readiness report written to ${opts.output}`);
95
+ }
96
+ else {
97
+ console.log(markdown);
98
+ }
99
+ // Exit with non-zero if not ready
100
+ if (!report.pass) {
101
+ process.exit(1);
102
+ }
103
+ });
104
+ }
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Shared option groups for the AILF CLI.
3
+ *
4
+ * Each function adds a group of related options to a Commander command.
5
+ * This eliminates copy-paste across commands that share common flags
6
+ * (e.g., debug options, Sanity source options, output options).
7
+ */
8
+ import type { Command } from "commander";
9
+ /**
10
+ * Add agentic options: --url, --header, --allowed-origin, --search
11
+ */
12
+ export declare function addAgenticOptions(cmd: Command): Command;
13
+ /**
14
+ * Add debug options: --debug, --debug-n, --debug-pattern, --debug-sample
15
+ */
16
+ export declare function addDebugOptions(cmd: Command): Command;
17
+ /**
18
+ * Add output options: --output, --format
19
+ */
20
+ export declare function addOutputOptions(cmd: Command): Command;
21
+ /**
22
+ * Add Sanity source options: --sanity-dataset, --sanity-project, etc.
23
+ */
24
+ export declare function addSanitySourceOptions(cmd: Command): Command;
25
+ /**
26
+ * Collect repeatable string options into an array.
27
+ * Used as a Commander argParser for options like --url, --header, --allowed-origin.
28
+ */
29
+ export declare function collect(value: string, previous: string[]): string[];
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Shared option groups for the AILF CLI.
3
+ *
4
+ * Each function adds a group of related options to a Commander command.
5
+ * This eliminates copy-paste across commands that share common flags
6
+ * (e.g., debug options, Sanity source options, output options).
7
+ */
8
+ /**
9
+ * Add agentic options: --url, --header, --allowed-origin, --search
10
+ */
11
+ export function addAgenticOptions(cmd) {
12
+ return cmd
13
+ .option("--url <url>", "Documentation URL (repeatable)", collect, [])
14
+ .option("--urls <url>", "Alias for --url (repeatable)", collect, [])
15
+ .option("--header <header>", 'Custom HTTP header "Key: Value" (repeatable)', collect, [])
16
+ .option("--headers <header>", "Alias for --header (repeatable)", collect, [])
17
+ .option("--allowed-origin <origin>", "Agent origin sandbox (repeatable, supports globs)", collect, [])
18
+ .option("--allowed-origins <origin>", "Alias for --allowed-origin (repeatable)", collect, [])
19
+ .option("-S, --search <mode>", "Web search mode: open, origin-only, off");
20
+ }
21
+ /**
22
+ * Add debug options: --debug, --debug-n, --debug-pattern, --debug-sample
23
+ */
24
+ export function addDebugOptions(cmd) {
25
+ return cmd
26
+ .option("-d, --debug", "Run subset of tests for fast feedback", false)
27
+ .option("--debug-n <n>", "First N tests", parseInt)
28
+ .option("--debug-pattern <regex>", "Filter tests by description regex")
29
+ .option("--debug-sample <n>", "Random sample of N tests", parseInt);
30
+ }
31
+ /**
32
+ * Add output options: --output, --format
33
+ */
34
+ export function addOutputOptions(cmd) {
35
+ return cmd
36
+ .option("-o, --output <path>", "Write output to a specific file path")
37
+ .option("-f, --format <fmt>", "Output format (e.g., table, json, md)");
38
+ }
39
+ /**
40
+ * Add Sanity source options: --sanity-dataset, --sanity-project, etc.
41
+ */
42
+ export function addSanitySourceOptions(cmd) {
43
+ return cmd
44
+ .option("--sanity-dataset <name>", "Override Sanity dataset")
45
+ .option("--sanity-project <id>", "Override Sanity project ID")
46
+ .option("--sanity-perspective <id>", "Sanity release perspective ID")
47
+ .option("--sanity-studio-origin <url>", "Sanity Studio base URL")
48
+ .option("--sanity-document <id>", "Evaluate specific Sanity document(s) (repeatable)", collect, [])
49
+ .option("--sanity-documents <id>", "Alias for --sanity-document (repeatable)", collect, []);
50
+ }
51
+ /**
52
+ * Collect repeatable string options into an array.
53
+ * Used as a Commander argParser for options like --url, --header, --allowed-origin.
54
+ */
55
+ export function collect(value, previous) {
56
+ return [...previous, value];
57
+ }
@@ -0,0 +1,5 @@
1
+ /**
2
+ * update-quality-scores command — update QUALITY_SCORE.md from scores.
3
+ */
4
+ import { Command } from "commander";
5
+ export declare function createUpdateQualityScoresCommand(): Command;
@@ -0,0 +1,20 @@
1
+ /**
2
+ * update-quality-scores command — update QUALITY_SCORE.md from scores.
3
+ */
4
+ import { Command } from "commander";
5
+ export function createUpdateQualityScoresCommand() {
6
+ return new Command("update-quality-scores")
7
+ .description("Update docs/QUALITY_SCORE.md from score-summary.json")
8
+ .action(async () => {
9
+ const { updateQualityScores } = await import("../scripts/update-quality-scores.js");
10
+ console.log("=== Updating QUALITY_SCORE.md from score-summary.json ===\n");
11
+ const result = updateQualityScores();
12
+ if (result.success) {
13
+ console.log(` ✅ ${result.message}`);
14
+ }
15
+ else {
16
+ console.error(` ❌ ${result.message}`);
17
+ process.exit(1);
18
+ }
19
+ });
20
+ }
@@ -0,0 +1,16 @@
1
+ /**
2
+ * validate-tasks command — standalone validation of repo-based task YAML files.
3
+ *
4
+ * Validates .ailf/tasks/*.yaml files against the RepoTaskSchema without
5
+ * running the full pipeline. Useful for pre-commit hooks and CI checks
6
+ * in external repos.
7
+ *
8
+ * Usage:
9
+ * ailf validate-tasks .ailf/tasks/
10
+ * ailf validate-tasks /path/to/external-repo/.ailf/tasks/
11
+ *
12
+ * @see packages/eval/src/adapters/task-sources/repo-schemas.ts
13
+ * @see packages/eval/src/adapters/task-sources/repo-validation.ts
14
+ */
15
+ import { Command } from "commander";
16
+ export declare function createValidateTasksCommand(): Command;
@@ -0,0 +1,93 @@
1
+ /**
2
+ * validate-tasks command — standalone validation of repo-based task YAML files.
3
+ *
4
+ * Validates .ailf/tasks/*.yaml files against the RepoTaskSchema without
5
+ * running the full pipeline. Useful for pre-commit hooks and CI checks
6
+ * in external repos.
7
+ *
8
+ * Usage:
9
+ * ailf validate-tasks .ailf/tasks/
10
+ * ailf validate-tasks /path/to/external-repo/.ailf/tasks/
11
+ *
12
+ * @see packages/eval/src/adapters/task-sources/repo-schemas.ts
13
+ * @see packages/eval/src/adapters/task-sources/repo-validation.ts
14
+ */
15
+ import { existsSync, readdirSync, readFileSync } from "fs";
16
+ import { resolve, relative } from "path";
17
+ import { Command } from "commander";
18
+ import { load } from "js-yaml";
19
+ import { parseRepoTaskFile } from "../adapters/task-sources/repo-schemas.js";
20
+ import { validateRepoTasks, formatValidationResult, } from "../adapters/task-sources/repo-validation.js";
21
+ export function createValidateTasksCommand() {
22
+ return new Command("validate-tasks")
23
+ .description("Validate repo-based task YAML files (.ailf/tasks/) against the schema")
24
+ .argument("[path]", "Path to tasks directory (default: .ailf/tasks/)", ".ailf/tasks")
25
+ .option("--strict", "Treat warnings as errors", false)
26
+ .action(async (tasksPath, opts) => {
27
+ const resolvedPath = resolve(tasksPath);
28
+ if (!existsSync(resolvedPath)) {
29
+ console.error(`❌ Directory not found: ${resolvedPath}`);
30
+ process.exit(1);
31
+ }
32
+ const yamlFiles = readdirSync(resolvedPath).filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."));
33
+ if (yamlFiles.length === 0) {
34
+ console.error(`❌ No YAML files found in ${resolvedPath}`);
35
+ process.exit(1);
36
+ }
37
+ console.log(`\nValidating ${yamlFiles.length} task file(s) in ${relative(process.cwd(), resolvedPath)}/\n`);
38
+ let totalTasks = 0;
39
+ let hasErrors = false;
40
+ const allTasks = [];
41
+ for (const file of yamlFiles) {
42
+ const filePath = resolve(resolvedPath, file);
43
+ const raw = readFileSync(filePath, "utf-8");
44
+ let parsed;
45
+ try {
46
+ parsed = load(raw);
47
+ }
48
+ catch (err) {
49
+ const msg = err instanceof Error ? err.message : String(err);
50
+ console.error(` ❌ ${file}: YAML parse error`);
51
+ console.error(` ${msg}\n`);
52
+ hasErrors = true;
53
+ continue;
54
+ }
55
+ if (!Array.isArray(parsed)) {
56
+ console.error(` ❌ ${file}: Expected a YAML array of task definitions`);
57
+ hasErrors = true;
58
+ continue;
59
+ }
60
+ try {
61
+ const tasks = parseRepoTaskFile(parsed, file);
62
+ console.log(` ✅ ${file}: ${tasks.length} task${tasks.length === 1 ? "" : "s"} valid`);
63
+ totalTasks += tasks.length;
64
+ allTasks.push(...tasks);
65
+ }
66
+ catch (err) {
67
+ const msg = err instanceof Error ? err.message : String(err);
68
+ console.error(` ❌ ${file}: Schema validation failed`);
69
+ console.error(`${msg
70
+ .split("\n")
71
+ .map((l) => ` ${l}`)
72
+ .join("\n")}\n`);
73
+ hasErrors = true;
74
+ }
75
+ }
76
+ // Run semantic validation on all parsed tasks
77
+ if (allTasks.length > 0) {
78
+ console.log(); // blank line
79
+ const semanticResult = validateRepoTasks(allTasks);
80
+ const formatted = formatValidationResult(semanticResult);
81
+ console.log(formatted);
82
+ if (!semanticResult.valid) {
83
+ hasErrors = true;
84
+ }
85
+ if (opts.strict && semanticResult.warnings.length > 0) {
86
+ hasErrors = true;
87
+ console.log("\n ⚠️ --strict mode: warnings treated as errors");
88
+ }
89
+ }
90
+ console.log(`\n${hasErrors ? "❌" : "✅"} ${totalTasks} task${totalTasks === 1 ? "" : "s"} across ${yamlFiles.length} file${yamlFiles.length === 1 ? "" : "s"}\n`);
91
+ process.exit(hasErrors ? 1 : 0);
92
+ });
93
+ }
@@ -0,0 +1,9 @@
1
+ /**
2
+ * validate command — checks pipeline configuration consistency.
3
+ *
4
+ * Validates all YAML files, task-to-mapping cross-references, reference
5
+ * solutions, context files, and environment variables.
6
+ *
7
+ */
8
+ import { Command } from "commander";
9
+ export declare function createValidateCommand(): Command;