@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,153 @@
1
+ /**
2
+ * Pipeline orchestrator — sequences steps and short-circuits on failure.
3
+ *
4
+ * This replaces the 1,672-line executePipeline() in pipeline-action.ts
5
+ * with declarative step sequencing. Each step is run through the
6
+ * StepRunner which provides uniform timing, precondition checking,
7
+ * error handling, and logging.
8
+ *
9
+ * When a jobId is present in the config (API-triggered evaluations),
10
+ * the orchestrator emits progress updates to the Content Lake after
11
+ * each step completes. This enables the GET /v1/jobs/:jobId polling
12
+ * endpoint to show real-time progress.
13
+ */
14
+ import { runStep } from "./step-runner.js";
15
+ // ---------------------------------------------------------------------------
16
+ // Job progress reporter
17
+ // ---------------------------------------------------------------------------
18
+ /**
19
+ * Report job progress to the Content Lake via the report store's
20
+ * underlying Sanity client. Best-effort — failures are logged and
21
+ * never block the pipeline.
22
+ */
23
+ async function reportJobProgress(ctx, stepName, completedSteps, totalSteps, status, errorInfo) {
24
+ const jobId = ctx.config.jobId;
25
+ if (!jobId)
26
+ return;
27
+ // Use the report store's write capability to patch the job document.
28
+ // The report store exposes a Sanity client — we access it through
29
+ // a best-effort PATCH via the same client infrastructure.
30
+ try {
31
+ // Dynamic import to avoid circular deps — the job store is a
32
+ // lightweight module that only needs a Sanity token.
33
+ const { JobStore } = await import("../job-store.js");
34
+ const store = new JobStore({
35
+ token: process.env.AILF_REPORT_SANITY_API_TOKEN ??
36
+ process.env.SANITY_API_TOKEN ??
37
+ undefined,
38
+ });
39
+ const update = {
40
+ status,
41
+ progress: {
42
+ currentStep: stepName,
43
+ completedSteps,
44
+ totalSteps,
45
+ },
46
+ };
47
+ if (status === "running" && completedSteps === 1) {
48
+ update.startedAt = new Date().toISOString();
49
+ }
50
+ if (errorInfo) {
51
+ update.error = errorInfo;
52
+ update.completedAt = new Date().toISOString();
53
+ }
54
+ await store.updateJob(jobId, update);
55
+ }
56
+ catch {
57
+ // Best effort — progress reporting should never block the pipeline
58
+ ctx.logger.warn(`Failed to report job progress for step "${stepName}" — continuing`);
59
+ }
60
+ }
61
+ // ---------------------------------------------------------------------------
62
+ // Orchestrator
63
+ // ---------------------------------------------------------------------------
64
+ /**
65
+ * Run a sequence of pipeline steps, short-circuiting on required step failure.
66
+ *
67
+ * Optional steps (step.optional === true) may fail without stopping
68
+ * the pipeline. Required step failures cause an immediate abort.
69
+ *
70
+ * When ctx.config.jobId is set, emits progress updates to the Content
71
+ * Lake after each step completes.
72
+ */
73
+ export async function orchestratePipeline(ctx, steps) {
74
+ const results = {};
75
+ const state = {};
76
+ const validation = { issues: [], valid: true };
77
+ const pipelineStart = Date.now();
78
+ const hasJob = !!ctx.config.jobId;
79
+ ctx.logger.section("ai-literacy-framework — Evaluation Pipeline");
80
+ // Report initial running status
81
+ if (hasJob) {
82
+ await reportJobProgress(ctx, steps[0]?.name ?? "init", 0, steps.length, "running");
83
+ }
84
+ for (let i = 0; i < steps.length; i++) {
85
+ const step = steps[i];
86
+ ctx.logger.section(step.name);
87
+ // Report current step progress
88
+ if (hasJob) {
89
+ await reportJobProgress(ctx, step.name, i, steps.length, "running");
90
+ }
91
+ const result = await runStep(step, ctx, state);
92
+ results[step.name] = result;
93
+ // Collect validation issues from the validate step
94
+ if (step.name === "validate" && result.status === "failed") {
95
+ validation.valid = false;
96
+ }
97
+ // Fail fast on required step failure
98
+ if (result.status === "failed" && !step.optional) {
99
+ ctx.logger.error(`Pipeline aborted: ${step.name} failed`);
100
+ // Report failure to job store
101
+ if (hasJob) {
102
+ const errorMsg = result.status === "failed" ? result.error : `${step.name} failed`;
103
+ await reportJobProgress(ctx, step.name, i + 1, steps.length, "failed", {
104
+ message: errorMsg,
105
+ step: step.name,
106
+ });
107
+ }
108
+ return {
109
+ durationMs: Date.now() - pipelineStart,
110
+ steps: results,
111
+ success: false,
112
+ validation,
113
+ };
114
+ }
115
+ // Report step completion
116
+ if (hasJob) {
117
+ await reportJobProgress(ctx, step.name, i + 1, steps.length, "running");
118
+ }
119
+ }
120
+ const durationMs = Date.now() - pipelineStart;
121
+ ctx.logger.section("Pipeline Complete");
122
+ ctx.logger.info(`All steps completed in ${durationMs}ms`);
123
+ // Report completion to job store (with reportId from state if available)
124
+ if (hasJob) {
125
+ try {
126
+ const { JobStore } = await import("../job-store.js");
127
+ const store = new JobStore({
128
+ token: process.env.AILF_REPORT_SANITY_API_TOKEN ??
129
+ process.env.SANITY_API_TOKEN ??
130
+ undefined,
131
+ });
132
+ await store.updateJob(ctx.config.jobId, {
133
+ status: "completed",
134
+ completedAt: new Date().toISOString(),
135
+ progress: {
136
+ currentStep: "complete",
137
+ completedSteps: steps.length,
138
+ totalSteps: steps.length,
139
+ },
140
+ ...(state.reportId ? { reportId: state.reportId } : {}),
141
+ });
142
+ }
143
+ catch {
144
+ ctx.logger.warn("Failed to report job completion — continuing");
145
+ }
146
+ }
147
+ return {
148
+ durationMs,
149
+ steps: results,
150
+ success: true,
151
+ validation,
152
+ };
153
+ }
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Uniform step execution with timing, logging, precondition checking,
3
+ * and cache integration.
4
+ *
5
+ * The StepRunner is the single place where cache lookup/record happens.
6
+ * Individual steps declare their cache inputs via cacheInputs(); the
7
+ * runner handles the rest.
8
+ */
9
+ import type { AppContext, PipelineState, PipelineStep, StepResult } from "../_vendor/ailf-core/index.d.ts";
10
+ /**
11
+ * Execute a single pipeline step with uniform middleware.
12
+ *
13
+ * Order of operations:
14
+ * 1. Precondition check → fail fast on errors
15
+ * 2. Cache lookup (if ctx.cache + step.cacheInputs + !noCache)
16
+ * 3. Execute step logic
17
+ * 4. Cache record (on success)
18
+ * 5. Log result
19
+ */
20
+ export declare function runStep(step: PipelineStep, ctx: AppContext, state?: PipelineState): Promise<StepResult>;
@@ -0,0 +1,88 @@
1
+ /**
2
+ * Uniform step execution with timing, logging, precondition checking,
3
+ * and cache integration.
4
+ *
5
+ * The StepRunner is the single place where cache lookup/record happens.
6
+ * Individual steps declare their cache inputs via cacheInputs(); the
7
+ * runner handles the rest.
8
+ */
9
+ /**
10
+ * Execute a single pipeline step with uniform middleware.
11
+ *
12
+ * Order of operations:
13
+ * 1. Precondition check → fail fast on errors
14
+ * 2. Cache lookup (if ctx.cache + step.cacheInputs + !noCache)
15
+ * 3. Execute step logic
16
+ * 4. Cache record (on success)
17
+ * 5. Log result
18
+ */
19
+ export async function runStep(step, ctx, state = {}) {
20
+ const start = Date.now();
21
+ // 1. Precondition check
22
+ const issues = step.check(ctx);
23
+ const errors = issues.filter((i) => i.severity === "error");
24
+ if (errors.length > 0) {
25
+ const result = {
26
+ durationMs: Date.now() - start,
27
+ error: `Precondition failed: ${errors.map((e) => e.message).join("; ")}`,
28
+ status: "failed",
29
+ };
30
+ ctx.logger.step(step.name, result);
31
+ return result;
32
+ }
33
+ // 2. Cache lookup
34
+ const canCache = ctx.cache && !ctx.config.noCache && typeof step.cacheInputs === "function";
35
+ let cacheKey;
36
+ if (canCache) {
37
+ try {
38
+ const inputs = step.cacheInputs(ctx);
39
+ const key = await ctx.cache.computeKey(inputs);
40
+ cacheKey = key;
41
+ const cached = await ctx.cache.lookup(step.name, key);
42
+ if (cached.hit) {
43
+ const result = {
44
+ durationMs: Date.now() - start,
45
+ status: "success",
46
+ summary: `Skipped (cached) — ${cached.entry.summary}`,
47
+ };
48
+ ctx.logger.step(step.name, result);
49
+ return result;
50
+ }
51
+ }
52
+ catch {
53
+ // Cache lookup failure is non-fatal — proceed to execute
54
+ }
55
+ }
56
+ // 3. Execute
57
+ try {
58
+ const result = await step.execute(ctx, state);
59
+ // Ensure timing is consistent (step may or may not track its own duration)
60
+ const timed = result.status === "skipped"
61
+ ? result
62
+ : { ...result, durationMs: Date.now() - start };
63
+ ctx.logger.step(step.name, timed);
64
+ // 4. Cache record (on success)
65
+ if (canCache && cacheKey && timed.status === "success") {
66
+ try {
67
+ await ctx.cache.record(step.name, cacheKey, {
68
+ durationMs: timed.durationMs,
69
+ outputPaths: [],
70
+ summary: timed.status === "success" ? (timed.summary ?? step.name) : "",
71
+ });
72
+ }
73
+ catch {
74
+ // Cache record failure is non-fatal
75
+ }
76
+ }
77
+ return timed;
78
+ }
79
+ catch (err) {
80
+ const result = {
81
+ durationMs: Date.now() - start,
82
+ error: err instanceof Error ? err.message : String(err),
83
+ status: "failed",
84
+ };
85
+ ctx.logger.step(step.name, result);
86
+ return result;
87
+ }
88
+ }
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Pipeline step: Calculate AI Literacy Scores from eval results.
3
+ *
4
+ * Calls calculateAndWriteScores() from pipeline/calculate-scores.ts with
5
+ * typed options derived from AppContext. No env bridge needed.
6
+ */
7
+ import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
8
+ export declare class CalculateScoresStep implements PipelineStep {
9
+ readonly name = "calculate-scores";
10
+ check(): ValidationIssue[];
11
+ execute(ctx: AppContext): Promise<StepResult>;
12
+ cacheInputs(ctx: AppContext): string[];
13
+ }
@@ -0,0 +1,95 @@
1
+ /**
2
+ * Pipeline step: Calculate AI Literacy Scores from eval results.
3
+ *
4
+ * Calls calculateAndWriteScores() from pipeline/calculate-scores.ts with
5
+ * typed options derived from AppContext. No env bridge needed.
6
+ */
7
+ import { join } from "path";
8
+ import { getStepInputPaths } from "../../pipeline/cache.js";
9
+ import { calculateAndWriteScores } from "../../pipeline/calculate-scores.js";
10
+ import { checkResultsExist, checkScoreSummaryValid, } from "../../pipeline/checks.js";
11
+ import { RESULTS_FILES } from "../../pipeline/eval-constants.js";
12
+ import { loadSource } from "../../sources.js";
13
+ import { configToSourceOverrides } from "../config-to-source-overrides.js";
14
+ export class CalculateScoresStep {
15
+ name = "calculate-scores";
16
+ check() {
17
+ return [];
18
+ }
19
+ async execute(ctx) {
20
+ const start = Date.now();
21
+ const primaryMode = ctx.config.mode === "full"
22
+ ? "baseline"
23
+ : ctx.config.mode;
24
+ const resultsFile = RESULTS_FILES[primaryMode];
25
+ // Precondition: results file exists
26
+ const resultsIssues = checkResultsExist(ctx.config.rootDir, resultsFile);
27
+ const resultsErrors = resultsIssues.filter((i) => i.severity === "error");
28
+ if (resultsErrors.length > 0) {
29
+ return {
30
+ durationMs: Date.now() - start,
31
+ error: `Results missing: ${resultsErrors.map((e) => e.message).join("; ")}`,
32
+ status: "failed",
33
+ };
34
+ }
35
+ // Resolve source once with typed overrides
36
+ const overrides = configToSourceOverrides(ctx.config);
37
+ let resolvedSource;
38
+ try {
39
+ resolvedSource = loadSource(ctx.config.source, overrides);
40
+ }
41
+ catch {
42
+ // Non-fatal — proceed without source metadata
43
+ }
44
+ try {
45
+ calculateAndWriteScores({
46
+ allowedOrigins: ctx.config.allowedOrigins,
47
+ mode: ctx.config.mode,
48
+ resolvedSource,
49
+ resultsPath: primaryMode !== "baseline"
50
+ ? join(ctx.config.rootDir, resultsFile)
51
+ : undefined,
52
+ rootDir: ctx.config.rootDir,
53
+ searchMode: ctx.config.searchMode,
54
+ source: ctx.config.source,
55
+ });
56
+ }
57
+ catch (err) {
58
+ const code = err !== null && typeof err === "object" && "status" in err
59
+ ? err.status
60
+ : undefined;
61
+ if (code !== undefined && code !== 1) {
62
+ return {
63
+ durationMs: Date.now() - start,
64
+ error: `calculate-scores failed with exit code ${code}`,
65
+ status: "failed",
66
+ };
67
+ }
68
+ if (code === undefined) {
69
+ return {
70
+ durationMs: Date.now() - start,
71
+ error: `calculate-scores failed: ${err instanceof Error ? err.message : String(err)}`,
72
+ status: "failed",
73
+ };
74
+ }
75
+ }
76
+ // Postcondition: score summary exists and is valid
77
+ const summaryIssues = checkScoreSummaryValid(ctx.config.rootDir);
78
+ const summaryErrors = summaryIssues.filter((i) => i.severity === "error");
79
+ if (summaryErrors.length > 0) {
80
+ return {
81
+ durationMs: Date.now() - start,
82
+ error: `Postcondition failed: ${summaryErrors.map((e) => e.message).join("; ")}`,
83
+ status: "failed",
84
+ };
85
+ }
86
+ return {
87
+ durationMs: Date.now() - start,
88
+ status: "success",
89
+ summary: "Scores calculated and summary written",
90
+ };
91
+ }
92
+ cacheInputs(ctx) {
93
+ return getStepInputPaths(ctx.config.rootDir, "calculate-scores");
94
+ }
95
+ }
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Pipeline step: Deliver results to a callback URL.
3
+ *
4
+ * After the pipeline completes and the report is published, this step
5
+ * POSTs the results to the caller's callback URL. Used for API-triggered
6
+ * evaluations where the caller wants push-based result delivery.
7
+ *
8
+ * This step is always optional — callback failure never blocks the pipeline.
9
+ * The result is already in the Content Lake (system of record).
10
+ *
11
+ * @see packages/eval/src/pipeline/callback-delivery.ts
12
+ * @see docs/design-docs/api-service-gateway.md
13
+ */
14
+ import type { AppContext, PipelineState, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
15
+ import { type CallbackConfig } from "../../pipeline/callback-delivery.js";
16
+ export declare class CallbackStep implements PipelineStep {
17
+ private readonly callback;
18
+ private readonly jobId?;
19
+ readonly name = "callback-delivery";
20
+ readonly optional = true;
21
+ constructor(callback: CallbackConfig, jobId?: string | undefined);
22
+ check(): ValidationIssue[];
23
+ execute(ctx: AppContext, state: PipelineState): Promise<StepResult>;
24
+ }
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Pipeline step: Deliver results to a callback URL.
3
+ *
4
+ * After the pipeline completes and the report is published, this step
5
+ * POSTs the results to the caller's callback URL. Used for API-triggered
6
+ * evaluations where the caller wants push-based result delivery.
7
+ *
8
+ * This step is always optional — callback failure never blocks the pipeline.
9
+ * The result is already in the Content Lake (system of record).
10
+ *
11
+ * @see packages/eval/src/pipeline/callback-delivery.ts
12
+ * @see docs/design-docs/api-service-gateway.md
13
+ */
14
+ import { readFileSync } from "fs";
15
+ import { resolve } from "path";
16
+ import { deliverCallback, } from "../../pipeline/callback-delivery.js";
17
+ export class CallbackStep {
18
+ callback;
19
+ jobId;
20
+ name = "callback-delivery";
21
+ optional = true;
22
+ constructor(callback, jobId) {
23
+ this.callback = callback;
24
+ this.jobId = jobId;
25
+ }
26
+ check() {
27
+ const issues = [];
28
+ if (!this.callback.url) {
29
+ issues.push({
30
+ message: "Callback URL is required",
31
+ severity: "error",
32
+ source: "callback-delivery",
33
+ });
34
+ }
35
+ return issues;
36
+ }
37
+ async execute(ctx, state) {
38
+ const start = Date.now();
39
+ const { rootDir } = ctx.config;
40
+ // Read score summary
41
+ let summary;
42
+ try {
43
+ const summaryPath = resolve(rootDir, "results", "latest", "score-summary.json");
44
+ summary = JSON.parse(readFileSync(summaryPath, "utf-8"));
45
+ }
46
+ catch (err) {
47
+ return {
48
+ durationMs: Date.now() - start,
49
+ error: `Failed to read score-summary.json for callback: ${err instanceof Error ? err.message : String(err)}`,
50
+ status: "failed",
51
+ };
52
+ }
53
+ // Deliver callback — read reportId from pipeline state (set by PublishReportStep)
54
+ ctx.logger.info(`Delivering results to ${this.callback.url}`);
55
+ const result = await deliverCallback(this.callback, {
56
+ deliveredAt: new Date().toISOString(),
57
+ jobId: this.jobId,
58
+ reportId: state.reportId,
59
+ summary,
60
+ });
61
+ if (result.ok) {
62
+ return {
63
+ durationMs: Date.now() - start,
64
+ status: "success",
65
+ summary: `Callback delivered to ${this.callback.url} (${result.attempts} attempt${result.attempts === 1 ? "" : "s"})`,
66
+ };
67
+ }
68
+ // Callback failure is not critical — log warning
69
+ ctx.logger.warn(`Callback delivery failed after ${result.attempts} attempts: ${result.error}`);
70
+ return {
71
+ durationMs: Date.now() - start,
72
+ error: `Callback delivery failed: ${result.error}`,
73
+ status: "failed",
74
+ };
75
+ }
76
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Pipeline step: Compare against baseline scores.
3
+ *
4
+ * This step is already pure (no execSync, no env vars) — the logic is
5
+ * inlined directly from the former pipeline/steps/compare-step.ts.
6
+ * This is an optional step — failure doesn't stop the pipeline.
7
+ */
8
+ import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
9
+ export declare class CompareStep implements PipelineStep {
10
+ readonly name = "compare";
11
+ readonly optional = true;
12
+ check(): ValidationIssue[];
13
+ execute(ctx: AppContext): Promise<StepResult>;
14
+ }
@@ -0,0 +1,92 @@
1
+ /**
2
+ * Pipeline step: Compare against baseline scores.
3
+ *
4
+ * This step is already pure (no execSync, no env vars) — the logic is
5
+ * inlined directly from the former pipeline/steps/compare-step.ts.
6
+ * This is an optional step — failure doesn't stop the pipeline.
7
+ */
8
+ import { existsSync, readFileSync, readdirSync, writeFileSync } from "fs";
9
+ import { join, resolve } from "path";
10
+ import { compare } from "../../pipeline/compare.js";
11
+ export class CompareStep {
12
+ name = "compare";
13
+ optional = true;
14
+ check() {
15
+ return [];
16
+ }
17
+ async execute(ctx) {
18
+ const start = Date.now();
19
+ const { rootDir } = ctx.config;
20
+ const scoreSummaryPath = resolve(rootDir, "results", "latest", "score-summary.json");
21
+ if (!existsSync(scoreSummaryPath)) {
22
+ return {
23
+ durationMs: Date.now() - start,
24
+ error: "score-summary.json not found. Run calculate-scores first.",
25
+ status: "failed",
26
+ };
27
+ }
28
+ // Load experiment (current run)
29
+ const experiment = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
30
+ // Resolve baseline
31
+ let resolvedBaselinePath;
32
+ if (ctx.config.compareBaseline) {
33
+ resolvedBaselinePath = resolve(ctx.config.compareBaseline);
34
+ }
35
+ else {
36
+ const baselinesDir = resolve(rootDir, "results", "baselines");
37
+ if (!existsSync(baselinesDir)) {
38
+ return {
39
+ reason: "No baselines directory found. Run 'pnpm baseline:save' first.",
40
+ status: "skipped",
41
+ };
42
+ }
43
+ const files = readdirSync(baselinesDir)
44
+ .filter((f) => f.endsWith(".json"))
45
+ .sort()
46
+ .reverse();
47
+ if (files.length === 0) {
48
+ return {
49
+ reason: "No baseline files found. Run 'pnpm baseline:save' first.",
50
+ status: "skipped",
51
+ };
52
+ }
53
+ resolvedBaselinePath = join(baselinesDir, files[0]);
54
+ }
55
+ if (!existsSync(resolvedBaselinePath)) {
56
+ return {
57
+ durationMs: Date.now() - start,
58
+ error: `Baseline file not found: ${resolvedBaselinePath}`,
59
+ status: "failed",
60
+ };
61
+ }
62
+ const baseline = JSON.parse(readFileSync(resolvedBaselinePath, "utf-8"));
63
+ // Run comparison
64
+ const options = ctx.config.compareThreshold
65
+ ? { noiseThreshold: ctx.config.compareThreshold }
66
+ : undefined;
67
+ const report = compare(baseline, experiment, options);
68
+ // Write report
69
+ const reportPath = resolve(rootDir, "results", "latest", "comparison-report.json");
70
+ writeFileSync(reportPath, JSON.stringify(report, null, 2));
71
+ // Build summary
72
+ const improved = report.improved.length;
73
+ const regressed = report.regressed.length;
74
+ const unchanged = report.unchanged.length;
75
+ const overallDelta = report.deltas.overall;
76
+ const deltaStr = overallDelta > 0
77
+ ? `+${Math.round(overallDelta)}`
78
+ : String(Math.round(overallDelta));
79
+ const parts = [`Overall: ${deltaStr}`];
80
+ if (improved > 0)
81
+ parts.push(`${improved} improved`);
82
+ if (regressed > 0)
83
+ parts.push(`${regressed} regressed`);
84
+ if (unchanged > 0)
85
+ parts.push(`${unchanged} unchanged`);
86
+ return {
87
+ durationMs: Date.now() - start,
88
+ status: "success",
89
+ summary: parts.join(", "),
90
+ };
91
+ }
92
+ }
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Pipeline step: Discovery report (agent discoverability analysis).
3
+ *
4
+ * Calls pure functions from pipeline/discovery-report.ts directly.
5
+ * Optional step — failure doesn't stop the pipeline.
6
+ */
7
+ import type { AppContext, PipelineStep, StepResult, ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
8
+ export declare class DiscoveryReportStep implements PipelineStep {
9
+ readonly name = "discovery-report";
10
+ readonly optional = true;
11
+ check(): ValidationIssue[];
12
+ execute(ctx: AppContext): Promise<StepResult>;
13
+ }
@@ -0,0 +1,55 @@
1
+ /**
2
+ * Pipeline step: Discovery report (agent discoverability analysis).
3
+ *
4
+ * Calls pure functions from pipeline/discovery-report.ts directly.
5
+ * Optional step — failure doesn't stop the pipeline.
6
+ */
7
+ import { existsSync, readFileSync, writeFileSync } from "fs";
8
+ import { resolve } from "path";
9
+ import { formatDiscoveryMarkdown, generateDiscoveryReport, } from "../../pipeline/discovery-report.js";
10
+ export class DiscoveryReportStep {
11
+ name = "discovery-report";
12
+ optional = true;
13
+ check() {
14
+ return [];
15
+ }
16
+ async execute(ctx) {
17
+ const root = ctx.config.rootDir;
18
+ const start = Date.now();
19
+ try {
20
+ const scoreSummaryPath = resolve(root, "results", "latest", "score-summary.json");
21
+ if (!existsSync(scoreSummaryPath)) {
22
+ return {
23
+ durationMs: Date.now() - start,
24
+ error: "score-summary.json not found",
25
+ status: "failed",
26
+ };
27
+ }
28
+ const scoreSummary = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
29
+ if (!scoreSummary.retrievalMetrics) {
30
+ return {
31
+ status: "skipped",
32
+ reason: "No retrieval metrics in score summary — run an agentic evaluation first",
33
+ };
34
+ }
35
+ const report = generateDiscoveryReport(scoreSummary, ctx.config.areas);
36
+ const md = formatDiscoveryMarkdown(report);
37
+ writeFileSync(resolve(root, "results", "latest", "discovery-report.md"), md);
38
+ console.log(md);
39
+ const invisible = report.invisibleDocs.length;
40
+ const f1 = report.overall.avgF1.toFixed(2);
41
+ return {
42
+ durationMs: Date.now() - start,
43
+ status: "success",
44
+ summary: `F1=${f1}, ${invisible} invisible doc${invisible === 1 ? "" : "s"}, ${report.recommendations.length} recommendation${report.recommendations.length === 1 ? "" : "s"}`,
45
+ };
46
+ }
47
+ catch (err) {
48
+ return {
49
+ durationMs: Date.now() - start,
50
+ error: err instanceof Error ? err.message : String(err),
51
+ status: "failed",
52
+ };
53
+ }
54
+ }
55
+ }
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Shell delegation for the fetch-docs step.
3
+ *
4
+ * Isolates the execSync call so it can be replaced when the pipeline
5
+ * fully migrates to the DocFetcher port.
6
+ */
7
+ export interface ShellResult {
8
+ ok: boolean;
9
+ error?: string;
10
+ }
11
+ /**
12
+ * Run `pnpm fetch-docs` via shell.
13
+ *
14
+ * Returns a result object instead of throwing so the step can
15
+ * handle the failure uniformly.
16
+ */
17
+ export declare function runFetchDocsShell(rootDir: string, source?: string): ShellResult;