@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,305 @@
1
+ /**
2
+ * Pipeline action — resolves CLI options and orchestrates pipeline steps.
3
+ *
4
+ * This file is the thin CLI-to-orchestrator bridge. The bulk of execution
5
+ * logic lives in packages/eval/src/orchestration/.
6
+ *
7
+ * Responsibilities:
8
+ * - Resolve CLI flags into typed ResolvedOptions
9
+ * - Delegate to the PipelineOrchestrator for step execution
10
+ *
11
+ * @see packages/eval/src/orchestration/ for the step-based pipeline
12
+ */
13
+ import { writeFileSync } from "fs";
14
+ import { dirname, resolve } from "path";
15
+ import { fileURLToPath } from "url";
16
+ import { classifyUrls } from "../pipeline/classify-url.js";
17
+ import { assessImpact, buildReverseMapping, } from "../pipeline/reverse-mapping.js";
18
+ import { buildAppContext } from "../orchestration/build-app-context.js";
19
+ import { buildStepSequence } from "../orchestration/build-step-sequence.js";
20
+ import { orchestratePipeline } from "../orchestration/pipeline-orchestrator.js";
21
+ const __dirname = dirname(fileURLToPath(import.meta.url));
22
+ const ROOT = resolve(__dirname, "..", "..");
23
+ // ---------------------------------------------------------------------------
24
+ // Valid modes & search modes
25
+ // ---------------------------------------------------------------------------
26
+ const VALID_MODES = ["baseline", "observed", "agentic", "full"];
27
+ const VALID_SEARCH_MODES = ["open", "origin-only", "off"];
28
+ /**
29
+ * Pure option resolution — computes ResolvedOptions from CLI flags without
30
+ * any side effects. Safe to call from --explain without mutating process.env.
31
+ *
32
+ * Exported so the plan builder can call it independently.
33
+ */
34
+ export function computeResolvedOptions(opts) {
35
+ // Validate mode
36
+ const mode = opts.mode;
37
+ if (!VALID_MODES.includes(mode)) {
38
+ console.error(`❌ Invalid mode "${opts.mode}". Must be one of: ${VALID_MODES.join(", ")}`);
39
+ process.exit(1);
40
+ }
41
+ // Debug options — any sub-flag (--debug-n, --debug-pattern, --debug-sample)
42
+ // implies --debug, so users don't need to pass both.
43
+ // When DEBUG_EVAL is explicitly "0", ignore the sub-flags from env.
44
+ // CLI flags (--debug-n, --debug-pattern, --debug-sample) always win.
45
+ const debugEnvDisabled = process.env.DEBUG_EVAL === "0";
46
+ const debugN = opts.debugN ??
47
+ (process.env.DEBUG_EVAL_N && !debugEnvDisabled
48
+ ? parseInt(process.env.DEBUG_EVAL_N, 10)
49
+ : undefined);
50
+ const debugPattern = opts.debugPattern ??
51
+ (process.env.DEBUG_EVAL_PATTERN && !debugEnvDisabled
52
+ ? process.env.DEBUG_EVAL_PATTERN
53
+ : undefined);
54
+ const debugSample = opts.debugSample ??
55
+ (process.env.DEBUG_EVAL_SAMPLE && !debugEnvDisabled
56
+ ? parseInt(process.env.DEBUG_EVAL_SAMPLE, 10)
57
+ : undefined);
58
+ const debugEnabled = opts.debug ||
59
+ process.env.DEBUG_EVAL === "1" ||
60
+ debugN !== undefined ||
61
+ debugPattern !== undefined ||
62
+ debugSample !== undefined;
63
+ const debug = debugEnabled
64
+ ? {
65
+ enabled: true,
66
+ firstN: debugN,
67
+ pattern: debugPattern,
68
+ sample: debugSample,
69
+ }
70
+ : undefined;
71
+ // Search mode validation
72
+ const searchMode = opts.search ?? process.env.EVAL_SEARCH_MODE ?? "open";
73
+ if (!VALID_SEARCH_MODES.includes(searchMode)) {
74
+ console.error(`❌ Invalid --search mode "${searchMode}". Must be one of: ${VALID_SEARCH_MODES.join(", ")}`);
75
+ process.exit(1);
76
+ }
77
+ // Merge repeatable args (singular + plural aliases)
78
+ const urlArgs = [...opts.url, ...opts.urls];
79
+ const headerArgs = [...opts.header, ...opts.headers];
80
+ const allowedOriginArgs = [...opts.allowedOrigin, ...opts.allowedOrigins];
81
+ const sanityDocumentArgs = [...opts.sanityDocument, ...opts.sanityDocuments];
82
+ // Source overrides
83
+ const datasetOverride = opts.sanityDataset;
84
+ const projectIdOverride = opts.sanityProject;
85
+ const perspectiveOverride = opts.sanityPerspective;
86
+ const studioOriginOverride = opts.sanityStudioOrigin;
87
+ // URL classification (pure computation — results captured, not applied to env)
88
+ if (urlArgs.length > 0) {
89
+ const classification = classifyUrls(urlArgs);
90
+ if (classification.documentIds.length > 0) {
91
+ const existing = sanityDocumentArgs.length > 0 ? sanityDocumentArgs : [];
92
+ const merged = [...new Set([...existing, ...classification.documentIds])];
93
+ sanityDocumentArgs.length = 0;
94
+ sanityDocumentArgs.push(...merged);
95
+ }
96
+ }
97
+ // Validate custom headers (early error)
98
+ if (headerArgs.length > 0) {
99
+ for (const h of headerArgs) {
100
+ const colonIdx = h.indexOf(":");
101
+ if (colonIdx === -1) {
102
+ console.error(`❌ Invalid header format: "${h}". Expected "Key: Value".`);
103
+ process.exit(1);
104
+ }
105
+ const key = h.slice(0, colonIdx).trim();
106
+ if (!key) {
107
+ console.error(`❌ Invalid header: empty key in "${h}"`);
108
+ process.exit(1);
109
+ }
110
+ }
111
+ }
112
+ // Auto-infer allowed origin from --url
113
+ if (urlArgs.length > 0 && allowedOriginArgs.length === 0) {
114
+ try {
115
+ const hostname = new URL(urlArgs[0]).hostname.replace(/^www\./, "");
116
+ allowedOriginArgs.push(hostname);
117
+ }
118
+ catch {
119
+ // Invalid URL — will be caught later in validation
120
+ }
121
+ }
122
+ // Scoping
123
+ const areaOption = opts.area ?? process.env.EVAL_FILTER_AREAS ?? undefined;
124
+ const taskOption = opts.task ?? process.env.EVAL_FILTER_TASKS ?? undefined;
125
+ const changedDocsOption = opts.changedDocs ?? process.env.EVAL_CHANGED_DOCS ?? undefined;
126
+ // Document-driven scoping (pure — computes impactSummary without env writes)
127
+ let impactSummary;
128
+ if (changedDocsOption) {
129
+ const changedSlugs = changedDocsOption
130
+ .split(",")
131
+ .map((s) => s.trim())
132
+ .filter(Boolean);
133
+ if (changedSlugs.length > 0) {
134
+ const reverseMapping = buildReverseMapping(ROOT);
135
+ impactSummary = assessImpact(changedSlugs, reverseMapping);
136
+ if (impactSummary.areas.length === 0) {
137
+ console.warn(`\n⚠️ No evaluation tasks reference any of the changed documents:`);
138
+ for (const slug of changedSlugs) {
139
+ console.warn(` - ${slug}`);
140
+ }
141
+ console.warn(`\n Score impact cannot be measured for these documents.\n`);
142
+ }
143
+ }
144
+ }
145
+ // Comparison: --before auto-enables --compare
146
+ const beforeOption = opts.before;
147
+ const compareEnabled = opts.compare || beforeOption !== undefined;
148
+ // Publish: smart default — auto-publish full runs when report store is configured
149
+ const reportStoreToken = process.env.AILF_REPORT_SANITY_API_TOKEN ?? process.env.SANITY_API_TOKEN;
150
+ const reportStoreConfigured = Boolean(reportStoreToken);
151
+ let publishEnabled;
152
+ if (opts.publish !== undefined) {
153
+ // Explicit --publish or --no-publish always wins
154
+ publishEnabled = opts.publish;
155
+ }
156
+ else if (process.env.AILF_PUBLISH === "1") {
157
+ publishEnabled = true;
158
+ }
159
+ else if (process.env.AILF_PUBLISH === "0") {
160
+ publishEnabled = false;
161
+ }
162
+ else {
163
+ // Smart default: full runs auto-publish when store is configured
164
+ publishEnabled = reportStoreConfigured && !debugEnabled;
165
+ }
166
+ // Report store overrides — fall back to the eval dataset so that
167
+ // perspective evaluations publish reports to the same dataset the
168
+ // Studio is reading from. AILF_REPORT_DATASET wins when set explicitly.
169
+ const reportDataset = opts.reportDataset ??
170
+ process.env.AILF_REPORT_DATASET ??
171
+ datasetOverride ??
172
+ undefined;
173
+ const reportProjectId = opts.reportProject ?? process.env.AILF_REPORT_PROJECT_ID ?? undefined;
174
+ return {
175
+ allowedOriginArgs,
176
+ areaOption,
177
+ beforeOption,
178
+ changedDocsOption,
179
+ compareBaseline: opts.compareBaseline,
180
+ compareEnabled,
181
+ compareThreshold: opts.threshold,
182
+ concurrency: opts.concurrency,
183
+ datasetOverride,
184
+ debug,
185
+ discoveryReportEnabled: opts.discoveryReport,
186
+ dryRun: opts.dryRun,
187
+ gapAnalysisEnabled: opts.gapAnalysis,
188
+ graderReplications: opts.graderReplications,
189
+ headerArgs,
190
+ impactSummary,
191
+ mode,
192
+ noCache: !opts.cache,
193
+ noRemoteCache: opts.remoteCache === false,
194
+ outputPath: opts.output,
195
+ perspectiveOverride,
196
+ projectIdOverride,
197
+ promptfooUrl: opts.promptfooUrl,
198
+ publishEnabled,
199
+ publishTag: opts.publishTag,
200
+ readinessEnabled: opts.readiness,
201
+ reportDataset,
202
+ reportProjectId,
203
+ sanityDocumentArgs,
204
+ searchMode,
205
+ skipEval: opts.skipEval,
206
+ skipFetch: opts.skipFetch,
207
+ source: opts.source,
208
+ studioOriginOverride,
209
+ repoTasksPath: opts.repoTasksPath,
210
+ taskOption,
211
+ taskSourceType: resolveTaskSourceType(opts.taskSource),
212
+ urlArgs,
213
+ };
214
+ }
215
+ /** Resolve and validate the --task-source flag value. */
216
+ function resolveTaskSourceType(raw) {
217
+ if (!raw || raw === "content-lake")
218
+ return undefined; // default — Content Lake
219
+ if (raw === "yaml")
220
+ return "yaml";
221
+ console.error(`❌ Invalid --task-source "${raw}". Must be "yaml" or "content-lake".`);
222
+ process.exit(1);
223
+ }
224
+ // ---------------------------------------------------------------------------
225
+ // Pipeline entry point
226
+ // ---------------------------------------------------------------------------
227
+ /**
228
+ * Execute the evaluation pipeline.
229
+ *
230
+ * 1. Resolve CLI options into typed ResolvedOptions
231
+ * 2. Build AppContext (composition root wires adapters)
232
+ * 3. Build step sequence from context
233
+ * 4. Delegate to the PipelineOrchestrator
234
+ */
235
+ export async function executePipeline(cliOpts) {
236
+ // When --config is provided, resolve config from file instead of CLI flags
237
+ if (cliOpts.config) {
238
+ const { existsSync } = await import("fs");
239
+ if (!existsSync(cliOpts.config)) {
240
+ console.error(`❌ Config file not found: ${cliOpts.config}`);
241
+ process.exit(1);
242
+ }
243
+ const { FileConfigAdapter } = await import("../adapters/config-sources/file-config-adapter.js");
244
+ const adapter = new FileConfigAdapter(cliOpts.config, ROOT);
245
+ const config = await adapter.resolve();
246
+ const ctx = buildAppContext(
247
+ // Build a minimal ResolvedOptions to satisfy the bridge.
248
+ // FileConfigAdapter already resolved to ResolvedConfig, so we
249
+ // pass it through by constructing AppContext directly.
250
+ computeResolvedOptions(cliOpts), ROOT);
251
+ // Override config with the file-based config
252
+ const fileCtx = { ...ctx, config };
253
+ const pipelineStart = Date.now();
254
+ const steps = buildStepSequence(fileCtx, pipelineStart);
255
+ const result = await orchestratePipeline(fileCtx, steps);
256
+ writePipelineResult(result);
257
+ process.exit(result.success ? 0 : 1);
258
+ }
259
+ const o = resolveOptions(cliOpts);
260
+ // Dry-run: validate only, don't execute steps
261
+ if (o.dryRun) {
262
+ const { validateConfiguration } = await import("../pipeline/validate.js");
263
+ const { checkEnvironment } = await import("../pipeline/checks.js");
264
+ const validation = validateConfiguration(ROOT);
265
+ const envIssues = checkEnvironment(ROOT);
266
+ validation.issues.push(...envIssues);
267
+ const errors = validation.issues.filter((i) => i.severity === "error");
268
+ if (errors.length > 0) {
269
+ console.error("❌ Configuration validation failed:\n");
270
+ for (const e of errors) {
271
+ console.error(` ERROR [${e.source}] ${e.message}`);
272
+ }
273
+ process.exit(1);
274
+ }
275
+ console.log("\n ✅ Configuration is valid");
276
+ console.log(" Pipeline configuration is valid. No steps were executed.");
277
+ console.log(" Remove --dry-run to execute the full pipeline.\n");
278
+ process.exit(0);
279
+ }
280
+ const ctx = buildAppContext(o, ROOT);
281
+ const pipelineStart = Date.now();
282
+ const steps = buildStepSequence(ctx, pipelineStart);
283
+ const result = await orchestratePipeline(ctx, steps);
284
+ writePipelineResult(result);
285
+ process.exit(result.success ? 0 : 1);
286
+ }
287
+ // ---------------------------------------------------------------------------
288
+ // Internal helpers
289
+ // ---------------------------------------------------------------------------
290
+ /**
291
+ * Resolve CLI options into typed ResolvedOptions.
292
+ */
293
+ function resolveOptions(opts) {
294
+ return computeResolvedOptions(opts);
295
+ }
296
+ function writePipelineResult(result) {
297
+ const resultFile = resolve(ROOT, "results", "latest", "pipeline-result.json");
298
+ try {
299
+ writeFileSync(resultFile, JSON.stringify(result, null, 2));
300
+ console.log(` 📄 Pipeline result: ${resultFile}\n`);
301
+ }
302
+ catch {
303
+ // results/latest/ may not exist yet — not critical
304
+ }
305
+ }
@@ -0,0 +1,62 @@
1
+ /**
2
+ * pipeline command — the main evaluation pipeline orchestrator.
3
+ *
4
+ * Defines all 36+ CLI flags via Commander, resolves them into a typed
5
+ * options object, bridges to process.env for downstream modules, and
6
+ * delegates to runPipeline().
7
+ *
8
+ * @see docs/API.md for the full flag reference.
9
+ */
10
+ import { Command } from "commander";
11
+ /**
12
+ * Raw CLI options as parsed by Commander.
13
+ * Field names follow Commander's camelCase convention for kebab-case flags.
14
+ */
15
+ export interface PipelineCliOptions {
16
+ allowedOrigin: string[];
17
+ allowedOrigins: string[];
18
+ area?: string;
19
+ before?: string;
20
+ cache: boolean;
21
+ changedDocs?: string;
22
+ compare: boolean;
23
+ compareBaseline?: string;
24
+ concurrency?: number;
25
+ config?: string;
26
+ debug: boolean;
27
+ debugN?: number;
28
+ debugPattern?: string;
29
+ debugSample?: number;
30
+ discoveryReport: boolean;
31
+ dryRun: boolean;
32
+ gapAnalysis: boolean;
33
+ graderReplications?: number;
34
+ header: string[];
35
+ headers: string[];
36
+ mode: string;
37
+ output?: string;
38
+ promptfooUrl?: string;
39
+ publish?: boolean;
40
+ publishTag?: string;
41
+ readiness: boolean;
42
+ remoteCache?: boolean;
43
+ reportDataset?: string;
44
+ reportProject?: string;
45
+ sanityDataset?: string;
46
+ sanityDocument: string[];
47
+ sanityDocuments: string[];
48
+ sanityPerspective?: string;
49
+ sanityProject?: string;
50
+ sanityStudioOrigin?: string;
51
+ search?: string;
52
+ skipEval: boolean;
53
+ skipFetch: boolean;
54
+ source?: string;
55
+ repoTasksPath?: string;
56
+ task?: string;
57
+ taskSource?: string;
58
+ threshold?: number;
59
+ url: string[];
60
+ urls: string[];
61
+ }
62
+ export declare function createPipelineCommand(): Command;
@@ -0,0 +1,53 @@
1
+ /**
2
+ * pipeline command — the main evaluation pipeline orchestrator.
3
+ *
4
+ * Defines all 36+ CLI flags via Commander, resolves them into a typed
5
+ * options object, bridges to process.env for downstream modules, and
6
+ * delegates to runPipeline().
7
+ *
8
+ * @see docs/API.md for the full flag reference.
9
+ */
10
+ import { Command } from "commander";
11
+ import { addAgenticOptions, addDebugOptions, addSanitySourceOptions, } from "./shared/options.js";
12
+ export function createPipelineCommand() {
13
+ const cmd = new Command("pipeline")
14
+ .description("Run the full evaluation pipeline")
15
+ .option("-m, --mode <mode>", "Evaluation mode: full (default — floor + ceiling + actual), baseline (floor + ceiling only), agentic (actual only), observed", "full")
16
+ .option("-s, --source <name>", "Documentation source name (from sources.yaml)")
17
+ .option("-n, --dry-run", "Validate configuration only, no execution", false)
18
+ .option("--skip-fetch", "Reuse cached documentation contexts", false)
19
+ .option("--skip-eval", "Recalculate from existing eval results", false)
20
+ .option("--no-cache", "Bypass all pipeline-level caching")
21
+ .option("--no-remote-cache", "Disable Content Lake cache lookup (local cache still active)")
22
+ .option("-a, --area <areas>", "Scope to feature areas (comma-separated)")
23
+ .option("-t, --task <id>", "Scope to specific task ID")
24
+ .option("--changed-docs <slugs>", "Auto-scope to tasks affected by these document slugs")
25
+ .option("-j, --concurrency <n>", "Max parallel API calls during evaluation", parseInt)
26
+ .option("--grader-replications <n>", "Grader consistency replications", parseInt)
27
+ .option("--before <source>", "Before-state for impact evaluation")
28
+ .option("-c, --compare", "Compare scores against latest baseline", false)
29
+ .option("--compare-baseline <path>", "Specific baseline file to compare")
30
+ .option("--threshold <n>", "Noise threshold for comparison (default: 2)", parseFloat)
31
+ .option("--no-gap-analysis", "Skip failure mode + impact analysis")
32
+ .option("--readiness", "Generate launch readiness checklist", false)
33
+ .option("--discovery-report", "Generate agent discoverability report", false)
34
+ .option("-p, --publish", "Write report to Sanity + fan out to sinks (auto-enabled for full runs when report store is configured)")
35
+ .option("--no-publish", "Suppress auto-publishing")
36
+ .option("--publish-tag <tag>", "Label for published report")
37
+ .option("--report-dataset <name>", "Sanity dataset for report store")
38
+ .option("--report-project <id>", "Sanity project ID for report store")
39
+ .option("--config <path>", "Load pipeline config from a JSON/YAML file (overrides most CLI flags)")
40
+ .option("-o, --output <path>", "Write PR comment markdown to file")
41
+ .option("--promptfoo-url <url>", "Promptfoo share URL for report")
42
+ .option("--task-source <type>", "Task definition source: content-lake (default — Sanity Content Lake), yaml (tasks/*.yaml files, legacy)", "content-lake")
43
+ .option("--repo-tasks-path <path>", "Path to repo-based task definitions (.ailf/tasks/ directory)")
44
+ .action(async (opts) => {
45
+ const { executePipeline } = await import("./pipeline-action.js");
46
+ await executePipeline(opts);
47
+ });
48
+ // Add shared option groups
49
+ addDebugOptions(cmd);
50
+ addSanitySourceOptions(cmd);
51
+ addAgenticOptions(cmd);
52
+ return cmd;
53
+ }
@@ -0,0 +1,8 @@
1
+ /**
2
+ * pr-comment command — generate a PR comment from evaluation scores.
3
+ *
4
+ * Uses the composition root to wire adapters, then calls
5
+ * generatePrComment() directly from pipeline/.
6
+ */
7
+ import { Command } from "commander";
8
+ export declare function createPrCommentCommand(): Command;
@@ -0,0 +1,47 @@
1
+ /**
2
+ * pr-comment command — generate a PR comment from evaluation scores.
3
+ *
4
+ * Uses the composition root to wire adapters, then calls
5
+ * generatePrComment() directly from pipeline/.
6
+ */
7
+ import { dirname, resolve } from "path";
8
+ import { fileURLToPath } from "url";
9
+ import { Command } from "commander";
10
+ import { createAppContext } from "../composition-root.js";
11
+ import { generatePrComment } from "../pipeline/pr-comment.js";
12
+ const __dirname = dirname(fileURLToPath(import.meta.url));
13
+ const ROOT = resolve(__dirname, "..", "..");
14
+ export function createPrCommentCommand() {
15
+ return new Command("pr-comment")
16
+ .description("Generate a markdown PR comment from evaluation scores")
17
+ .option("--output <path>", "Write comment to file (default: stdout)")
18
+ .option("--promptfoo-url <url>", "Promptfoo share URL to include")
19
+ .action(async (opts) => {
20
+ try {
21
+ const ctx = createAppContext({
22
+ rootDir: ROOT,
23
+ mode: "baseline",
24
+ skipFetch: true,
25
+ skipEval: true,
26
+ compareEnabled: false,
27
+ gapAnalysisEnabled: false,
28
+ readinessEnabled: false,
29
+ discoveryReportEnabled: false,
30
+ publishEnabled: false,
31
+ noCache: true,
32
+ noRemoteCache: true,
33
+ searchMode: "open",
34
+ });
35
+ generatePrComment({
36
+ outputPath: opts.output,
37
+ promptfooUrl: opts.promptfooUrl,
38
+ rootDir: ctx.config.rootDir,
39
+ });
40
+ }
41
+ catch (err) {
42
+ process.exitCode = 1;
43
+ if (err instanceof Error)
44
+ console.error(err.message);
45
+ }
46
+ });
47
+ }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * publish command — manually publish a local evaluation report to the
3
+ * Sanity Content Lake without re-running the pipeline.
4
+ *
5
+ * Reads a score-summary.json (defaulting to results/latest/score-summary.json),
6
+ * builds provenance, writes the report to Sanity, and fans out to configured
7
+ * sinks — exactly the same as the publish step in `ailf pipeline`, but
8
+ * standalone.
9
+ *
10
+ * Uses createAppContext() (composition root) for all infrastructure access.
11
+ *
12
+ * @example
13
+ * ailf publish # default path
14
+ * ailf publish ./my-results/score-summary.json # custom path
15
+ * ailf publish --tag "manual-2026-03-13" # with a label
16
+ * ailf publish --dry-run # preview without writing
17
+ *
18
+ * @see packages/eval/src/composition-root.ts
19
+ * @see docs/design-docs/report-store/architecture.md
20
+ */
21
+ import { Command } from "commander";
22
+ export interface PublishCommandOptions {
23
+ dryRun: boolean;
24
+ tag?: string;
25
+ }
26
+ export declare function createPublishCommand(): Command;