@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Port: How evaluation caching works.
3
+ *
4
+ * The CacheStore abstracts pipeline step caching behind a uniform
5
+ * interface. The StepRunner uses it for automatic cache lookup/record
6
+ * so individual steps don't need to manage their own caching.
7
+ *
8
+ * Adapters:
9
+ * - FilesystemCache — local results/cache/ directory (current behavior)
10
+ * - ContentLakeCacheAdapter — decorator that adds remote fingerprint
11
+ * lookup on local cache miss
12
+ * - NoOpCache (testing) — always misses
13
+ *
14
+ * @see packages/eval/src/pipeline/cache.ts — existing implementation
15
+ * @see docs/design-docs/content-lake-eval-caching.md
16
+ */
17
+ /**
18
+ * Opaque cache key — a content hash of the step's input files.
19
+ * Branded to prevent accidentally passing raw strings.
20
+ */
21
+ export type CacheKey = string & {
22
+ readonly __brand: "CacheKey";
23
+ };
24
+ /**
25
+ * Stored metadata for a cache entry.
26
+ * Kept minimal — just enough for cache status reporting.
27
+ */
28
+ export interface CacheEntryMetadata {
29
+ /** Duration of the original execution in milliseconds */
30
+ durationMs: number;
31
+ /** Output file paths produced (relative to package root) */
32
+ outputPaths: string[];
33
+ /** Human-readable summary of what was cached */
34
+ summary: string;
35
+ /** When this entry was created (ISO 8601) */
36
+ timestamp: string;
37
+ }
38
+ /** Result of a cache lookup */
39
+ export type CacheLookupResult = {
40
+ hit: true;
41
+ entry: CacheEntryMetadata;
42
+ } | {
43
+ hit: false;
44
+ };
45
+ /** Metadata to record after a successful step execution */
46
+ export interface CacheRecordInput {
47
+ /** Duration of the execution in milliseconds */
48
+ durationMs: number;
49
+ /** Output file paths produced (relative to package root) */
50
+ outputPaths: string[];
51
+ /** Human-readable summary */
52
+ summary: string;
53
+ }
54
+ /**
55
+ * Port: Pipeline step caching.
56
+ *
57
+ * The StepRunner calls these methods around step execution:
58
+ * 1. computeKey(step.cacheInputs(ctx)) → CacheKey
59
+ * 2. lookup(stepName, key) → hit or miss
60
+ * 3. (on miss) execute step
61
+ * 4. (on success) record(stepName, key, metadata)
62
+ */
63
+ export interface CacheStore {
64
+ /** Compute a cache key from file paths and optional context strings */
65
+ computeKey(inputPaths: string[], context?: string[]): Promise<CacheKey>;
66
+ /** Look up a cache entry by step name and key */
67
+ lookup(step: string, key: CacheKey): Promise<CacheLookupResult>;
68
+ /** Record a cache entry after successful step execution */
69
+ record(step: string, key: CacheKey, metadata: CacheRecordInput): Promise<void>;
70
+ /** Clear all cache entries */
71
+ clear(): Promise<void>;
72
+ }
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Port: How evaluation caching works.
3
+ *
4
+ * The CacheStore abstracts pipeline step caching behind a uniform
5
+ * interface. The StepRunner uses it for automatic cache lookup/record
6
+ * so individual steps don't need to manage their own caching.
7
+ *
8
+ * Adapters:
9
+ * - FilesystemCache — local results/cache/ directory (current behavior)
10
+ * - ContentLakeCacheAdapter — decorator that adds remote fingerprint
11
+ * lookup on local cache miss
12
+ * - NoOpCache (testing) — always misses
13
+ *
14
+ * @see packages/eval/src/pipeline/cache.ts — existing implementation
15
+ * @see docs/design-docs/content-lake-eval-caching.md
16
+ */
17
+ export {};
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Port: Where pipeline configuration comes from.
3
+ *
4
+ * The ConfigSource abstracts configuration resolution so the pipeline
5
+ * can be driven from different sources without changing orchestration:
6
+ *
7
+ * Adapters:
8
+ * - CliConfigAdapter — Commander flags + env vars (current behavior)
9
+ * - FileConfigAdapter — reads a JSON/YAML config file (--config flag)
10
+ * - ContentLakeConfigAdapter (future: studio-eval-config) — GROQ query
11
+ *
12
+ * @see packages/eval/src/commands/pipeline-action.ts — current implementation
13
+ * @see docs/ideas/studio-eval-config.md — future Content Lake adapter
14
+ */
15
+ import type { ResolvedConfig } from "./context.js";
16
+ /**
17
+ * Port: Resolve pipeline configuration from any source into the
18
+ * canonical ResolvedConfig shape.
19
+ *
20
+ * The adapter handles parsing, validation (via Zod), and defaults.
21
+ * The returned config is fully resolved — no env var lookups needed
22
+ * downstream.
23
+ */
24
+ export interface ConfigSource {
25
+ /** Human-readable label for diagnostics (e.g., "CLI flags", "config.json") */
26
+ readonly label: string;
27
+ /**
28
+ * Resolve configuration into the canonical ResolvedConfig shape.
29
+ *
30
+ * @throws on validation failure (invalid config file, missing fields)
31
+ */
32
+ resolve(): Promise<ResolvedConfig>;
33
+ }
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Port: Where pipeline configuration comes from.
3
+ *
4
+ * The ConfigSource abstracts configuration resolution so the pipeline
5
+ * can be driven from different sources without changing orchestration:
6
+ *
7
+ * Adapters:
8
+ * - CliConfigAdapter — Commander flags + env vars (current behavior)
9
+ * - FileConfigAdapter — reads a JSON/YAML config file (--config flag)
10
+ * - ContentLakeConfigAdapter (future: studio-eval-config) — GROQ query
11
+ *
12
+ * @see packages/eval/src/commands/pipeline-action.ts — current implementation
13
+ * @see docs/ideas/studio-eval-config.md — future Content Lake adapter
14
+ */
15
+ export {};
@@ -0,0 +1,172 @@
1
+ /**
2
+ * Application context — the dependency carrier for all pipeline operations.
3
+ *
4
+ * Passed to every pipeline step, domain service, and adapter that needs
5
+ * access to infrastructure. Replaces process.env as the state-passing
6
+ * mechanism.
7
+ *
8
+ * Created once per CLI invocation by createAppContext() (composition root).
9
+ * Created per-test by createTestContext().
10
+ *
11
+ * Fields marked optional are transitional — they will become required
12
+ * as downstream consumers are converted to use them.
13
+ */
14
+ import type { DebugOptions, EvalMode } from "../types/index.js";
15
+ import type { CacheStore } from "./cache-store.js";
16
+ import type { DocFetcher } from "./doc-fetcher.js";
17
+ import type { EvalRunner } from "./eval-runner.js";
18
+ import type { Logger } from "./logger.js";
19
+ import type { TaskSource } from "./task-source.js";
20
+ /**
21
+ * Resolved pipeline configuration — the typed, validated result of
22
+ * merging CLI flags, env vars, config files, and defaults.
23
+ *
24
+ * This replaces the ~25 process.env variables that applyEnvironment()
25
+ * currently sets. Fields are added incrementally as steps are converted.
26
+ */
27
+ export interface ResolvedConfig {
28
+ /** Eval package root directory */
29
+ rootDir: string;
30
+ /** Evaluation mode */
31
+ mode: EvalMode;
32
+ /** Debug options */
33
+ debug?: DebugOptions;
34
+ /** Feature area filter */
35
+ areas?: string[];
36
+ /** Task ID filter */
37
+ tasks?: string[];
38
+ /** Changed doc slugs for impact scoping */
39
+ changedDocs?: string[];
40
+ /** Documentation source name */
41
+ source?: string;
42
+ /** Whether to skip the fetch-docs step */
43
+ skipFetch: boolean;
44
+ /** Whether to skip the eval step */
45
+ skipEval: boolean;
46
+ /** Whether comparison is enabled */
47
+ compareEnabled: boolean;
48
+ /** Comparison noise threshold */
49
+ compareThreshold?: number;
50
+ /** Comparison baseline path */
51
+ compareBaseline?: string;
52
+ /** Whether gap analysis is enabled */
53
+ gapAnalysisEnabled: boolean;
54
+ /** Whether readiness report is enabled */
55
+ readinessEnabled: boolean;
56
+ /** Whether discovery report is enabled */
57
+ discoveryReportEnabled: boolean;
58
+ /** Whether publishing is enabled */
59
+ publishEnabled: boolean;
60
+ /** Publish tag */
61
+ publishTag?: string;
62
+ /** Whether to disable local cache */
63
+ noCache: boolean;
64
+ /** Whether to disable remote (Content Lake) cache */
65
+ noRemoteCache: boolean;
66
+ /** Grader replications for consistency measurement */
67
+ graderReplications?: number;
68
+ /** Output path override */
69
+ outputPath?: string;
70
+ /** Doc source URL overrides */
71
+ urls?: string[];
72
+ /** Custom headers for doc fetching */
73
+ headers?: Record<string, string>;
74
+ /** Allowed origins for agentic mode */
75
+ allowedOrigins?: string[];
76
+ /** Search mode for agentic mode */
77
+ searchMode: "off" | "open" | "origin-only";
78
+ /** Eval concurrency */
79
+ concurrency?: number;
80
+ /** Promptfoo URL from eval output */
81
+ promptfooUrl?: string;
82
+ /** Sanity dataset override */
83
+ datasetOverride?: string;
84
+ /** Sanity project ID override */
85
+ projectIdOverride?: string;
86
+ /** Sanity perspective override */
87
+ perspectiveOverride?: string;
88
+ /** Sanity studio origin override */
89
+ studioOriginOverride?: string;
90
+ /** Sanity document filter args */
91
+ sanityDocumentArgs?: string[];
92
+ /** Before option for comparison */
93
+ beforeOption?: string;
94
+ /** Task source adapter selection */
95
+ taskSourceType?: "content-lake" | "yaml";
96
+ /** Path to repo-based tasks directory (e.g., .ailf/tasks/) */
97
+ repoTasksPath?: string;
98
+ /** Callback URL configuration for API-triggered evaluations */
99
+ callback?: {
100
+ url: string;
101
+ headers?: Record<string, string>;
102
+ };
103
+ /** Job ID for tracking API-triggered evaluations in the Content Lake */
104
+ jobId?: string;
105
+ }
106
+ /**
107
+ * Application context — the complete dependency carrier.
108
+ *
109
+ * Every pipeline step, domain service, and adapter receives this
110
+ * as its first parameter. It replaces:
111
+ * - process.env (environment-based state passing)
112
+ * - Module-level singletons (implicit global state)
113
+ * - Direct constructor calls (ad-hoc dependency creation)
114
+ *
115
+ * Created once per CLI invocation by createAppContext().
116
+ * Created per-test by createTestContext().
117
+ */
118
+ export interface AppContext {
119
+ /** Evaluation caching (filesystem + optional Content Lake fallback) */
120
+ readonly cache?: CacheStore;
121
+ /** Resolved pipeline configuration */
122
+ readonly config: ResolvedConfig;
123
+ /** Documentation context fetcher */
124
+ readonly docFetcher?: DocFetcher;
125
+ /** LLM evaluation runner (Promptfoo adapter) */
126
+ readonly evalRunner: EvalRunner;
127
+ /** Structured logger */
128
+ readonly logger: Logger;
129
+ /**
130
+ * Persistent report store (Sanity Content Lake).
131
+ * Optional — not all commands need it. Commands that publish or
132
+ * query reports (publish, weekly-digest, pipeline with --publish)
133
+ * require this field.
134
+ */
135
+ readonly reportStore?: ReportStorePort;
136
+ /**
137
+ * Report delivery sinks (Slack, BigQuery, webhooks).
138
+ * Empty array when no sinks are configured.
139
+ */
140
+ readonly sinks?: ReportSinkPort[];
141
+ /** Task definition source (YAML, Content Lake, repo) */
142
+ readonly taskSource: TaskSource;
143
+ }
144
+ /**
145
+ * Minimal report store interface used by AppContext.
146
+ *
147
+ * The concrete ReportStore class in packages/eval implements this.
148
+ * Defined as a structural interface so the domain kernel has no
149
+ * import dependency on the concrete class.
150
+ */
151
+ export interface ReportStorePort {
152
+ /** Auto-compare against the most recent comparable baseline */
153
+ autoCompare(currentSummary: unknown, provenance: unknown, completedAt: unknown): Promise<unknown>;
154
+ /** Find a report by its eval fingerprint (for cache lookup) */
155
+ findByFingerprint(fingerprint: string): Promise<null | unknown>;
156
+ /** Find the most recent comparable baseline for auto-comparison */
157
+ findComparableBaseline(query: unknown): Promise<null | unknown>;
158
+ /** Write a report to the store */
159
+ write(report: unknown): Promise<unknown>;
160
+ }
161
+ /**
162
+ * Minimal report sink interface used by AppContext.
163
+ *
164
+ * The concrete ReportSink type in packages/eval/src/sinks/ implements this.
165
+ * Defined structurally to avoid coupling the domain kernel to sink implementations.
166
+ */
167
+ export interface ReportSinkPort {
168
+ /** Human-readable name for logging */
169
+ readonly name: string;
170
+ /** Deliver a report to this sink */
171
+ publish(report: unknown): Promise<unknown>;
172
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Application context — the dependency carrier for all pipeline operations.
3
+ *
4
+ * Passed to every pipeline step, domain service, and adapter that needs
5
+ * access to infrastructure. Replaces process.env as the state-passing
6
+ * mechanism.
7
+ *
8
+ * Created once per CLI invocation by createAppContext() (composition root).
9
+ * Created per-test by createTestContext().
10
+ *
11
+ * Fields marked optional are transitional — they will become required
12
+ * as downstream consumers are converted to use them.
13
+ */
14
+ export {};
@@ -0,0 +1,131 @@
1
+ /**
2
+ * Port: Where documentation context comes from.
3
+ *
4
+ * Adapters:
5
+ * - SanityDocFetcher — GROQ queries against Sanity Content Lake
6
+ * (perspective diffing, document overlays, URL fetching)
7
+ * - InMemoryDocFetcher (testing) — returns pre-loaded content
8
+ *
9
+ * The pipeline orchestrator and all downstream steps work with
10
+ * FetchResult regardless of where the documentation came from.
11
+ */
12
+ import type { TaskDefinition } from "./task-source.js";
13
+ /**
14
+ * A fetched documentation context ready for injection into prompts.
15
+ *
16
+ * Each DocContext corresponds to one task's documentation — the combined
17
+ * markdown of all canonical docs for that task plus any overlays/URLs.
18
+ */
19
+ export interface DocContext {
20
+ /** The task ID this context belongs to */
21
+ taskId: string;
22
+ /** Combined markdown content of the fetched documentation */
23
+ content: string;
24
+ /** Document slugs included in this context */
25
+ slugs: string[];
26
+ /** Approximate token count (length / 4) */
27
+ tokenCount?: number;
28
+ }
29
+ /**
30
+ * Configuration for a documentation source.
31
+ *
32
+ * Maps to the resolved source config from config/sources.yaml, but
33
+ * expressed as a domain type independent of the YAML structure.
34
+ */
35
+ export interface DocSourceConfig {
36
+ /** Base URL (e.g., "https://www.sanity.io/docs") */
37
+ baseUrl?: string;
38
+ /** Sanity dataset (e.g., "production") */
39
+ dataset?: string;
40
+ /** Sanity project ID */
41
+ projectId?: string;
42
+ /** Custom headers for authenticated requests */
43
+ headers?: Record<string, string>;
44
+ /** Content release perspective (e.g., bundle name) */
45
+ perspective?: string;
46
+ /** Direct URLs to fetch and include in context */
47
+ urls?: string[];
48
+ /** Sanity document IDs for overlay (replace/append to canonical docs) */
49
+ documentIds?: string[];
50
+ }
51
+ /** Metadata about a fetched document, for traceability */
52
+ export interface DocumentManifestEntry {
53
+ _id: string;
54
+ _rev: string;
55
+ slug: string;
56
+ title: string;
57
+ }
58
+ /** Impact of a content release on canonical documents */
59
+ export interface ReleaseImpact {
60
+ added: string[];
61
+ modified: string[];
62
+ removed: string[];
63
+ unchanged: string[];
64
+ }
65
+ /** Summary of document overlay operations */
66
+ export interface DocumentOverlaySummary {
67
+ appendedCount: number;
68
+ documentIds: string[];
69
+ replacedSlugs: string[];
70
+ }
71
+ /** Metadata for a single URL fetch result */
72
+ export interface UrlFetchEntry {
73
+ contentLength?: number;
74
+ error?: string;
75
+ method: string;
76
+ status?: number;
77
+ url: string;
78
+ }
79
+ /** Summary of URL fetch operations */
80
+ export interface UrlFetchSummary {
81
+ failures: {
82
+ error?: string;
83
+ url: string;
84
+ }[];
85
+ fetchedUrls: UrlFetchEntry[];
86
+ totalFailed: number;
87
+ totalFetched: number;
88
+ }
89
+ /** Metadata about the fetch operation, for downstream pipeline consumption */
90
+ export interface FetchMetadata {
91
+ /** Document manifest for traceability (slug, _id, _rev, title) */
92
+ manifest?: DocumentManifestEntry[];
93
+ /** Release impact from perspective diffing */
94
+ releaseImpact?: ReleaseImpact;
95
+ /** Document overlay operations summary */
96
+ documentOverlay?: DocumentOverlaySummary;
97
+ /** URL fetch operations summary */
98
+ urlFetch?: UrlFetchSummary;
99
+ }
100
+ /** Complete result of a doc fetch operation */
101
+ export interface FetchResult {
102
+ /** Per-task documentation contexts */
103
+ contexts: DocContext[];
104
+ /** Optional metadata about the fetch (manifests, impact, overlays, URLs) */
105
+ metadata?: FetchMetadata;
106
+ }
107
+ /**
108
+ * Port: Where documentation context comes from.
109
+ *
110
+ * The pipeline never knows HOW docs are fetched — it only sees
111
+ * FetchResult. The adapter handles GROQ queries, Portable Text
112
+ * conversion, URL fetching, perspective diffing, document overlays, etc.
113
+ */
114
+ export interface DocFetcher {
115
+ /**
116
+ * Fetch documentation context for a set of tasks.
117
+ *
118
+ * The adapter:
119
+ * 1. Collects unique slugs from tasks' canonicalDocs
120
+ * 2. Fetches documents from the source (with perspective/overlay support)
121
+ * 3. Converts to markdown
122
+ * 4. Assembles per-task context (combining all canonical docs + overlays)
123
+ * 5. Writes context files to disk (for Promptfoo consumption)
124
+ * 6. Returns structured metadata (manifest, release impact, etc.)
125
+ *
126
+ * @param tasks — Task definitions with canonical doc references
127
+ * @param source — Where to fetch documentation from
128
+ * @returns Fetched doc contexts + optional metadata
129
+ */
130
+ fetch(tasks: TaskDefinition[], source?: DocSourceConfig): Promise<FetchResult>;
131
+ }
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Port: Where documentation context comes from.
3
+ *
4
+ * Adapters:
5
+ * - SanityDocFetcher — GROQ queries against Sanity Content Lake
6
+ * (perspective diffing, document overlays, URL fetching)
7
+ * - InMemoryDocFetcher (testing) — returns pre-loaded content
8
+ *
9
+ * The pipeline orchestrator and all downstream steps work with
10
+ * FetchResult regardless of where the documentation came from.
11
+ */
12
+ export {};
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Port: How LLM evaluation is executed.
3
+ *
4
+ * Abstracts the eval engine (Promptfoo today, potentially something else
5
+ * in the future). The domain kernel never mentions Promptfoo — it works
6
+ * with evaluation results.
7
+ */
8
+ import type { StepResult } from "../types/index.js";
9
+ export interface EvalRunConfig {
10
+ /** Path to the promptfoo config file to execute */
11
+ configPath: string;
12
+ /** Debug filter flags (--filter-first-n, --filter-pattern, etc.) */
13
+ filterFlags?: string;
14
+ /** Concurrency limit */
15
+ concurrency?: number;
16
+ /** Environment variables to pass to the eval process */
17
+ env?: Record<string, string>;
18
+ }
19
+ export interface EvalRunner {
20
+ /** Run an evaluation and return the step result */
21
+ run(config: EvalRunConfig): Promise<StepResult>;
22
+ /** Extract the share URL from eval output (Promptfoo-specific) */
23
+ extractShareUrl?(outputPath: string): string | undefined;
24
+ }
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Port: How LLM evaluation is executed.
3
+ *
4
+ * Abstracts the eval engine (Promptfoo today, potentially something else
5
+ * in the future). The domain kernel never mentions Promptfoo — it works
6
+ * with evaluation results.
7
+ */
8
+ export {};
@@ -0,0 +1,15 @@
1
+ /**
2
+ * @sanity/ailf-core — Port interfaces
3
+ *
4
+ * Ports define the contracts between the domain kernel and the outside world.
5
+ * Adapters (in packages/eval) implement these interfaces.
6
+ */
7
+ export type { CacheEntryMetadata, CacheKey, CacheLookupResult, CacheRecordInput, CacheStore, } from "./cache-store.js";
8
+ export type { ConfigSource } from "./config-source.js";
9
+ export type { AppContext, ReportSinkPort, ReportStorePort, ResolvedConfig, } from "./context.js";
10
+ export type { DocContext, DocFetcher, DocSourceConfig, DocumentManifestEntry, DocumentOverlaySummary, FetchMetadata, FetchResult, ReleaseImpact, UrlFetchEntry, UrlFetchSummary, } from "./doc-fetcher.js";
11
+ export type { EvalRunConfig, EvalRunner } from "./eval-runner.js";
12
+ export type { Logger } from "./logger.js";
13
+ export type { PipelineStep } from "./pipeline-step.js";
14
+ export type { AssertionDefinition, BaselineConfig, CanonicalDocRef, IdDocRef, PathDocRef, PerspectiveDocRef, SlugDocRef, TaskDefinition, TaskSource, TemplatedAssertion, ValueAssertion, } from "./task-source.js";
15
+ export { canonicalDocRefLabel, isIdRef, isPathRef, isPerspectiveRef, isSlugRef, isTemplatedAssertion, } from "./task-source.js";
@@ -0,0 +1,7 @@
1
+ /**
2
+ * @sanity/ailf-core — Port interfaces
3
+ *
4
+ * Ports define the contracts between the domain kernel and the outside world.
5
+ * Adapters (in packages/eval) implement these interfaces.
6
+ */
7
+ export { canonicalDocRefLabel, isIdRef, isPathRef, isPerspectiveRef, isSlugRef, isTemplatedAssertion, } from "./task-source.js";
@@ -0,0 +1,36 @@
1
+ /**
2
+ * Port: Structured logging.
3
+ *
4
+ * All pipeline output flows through this interface. Adapters control
5
+ * formatting (emoji console, plain text, JSON, silent) without
6
+ * changing the calling code.
7
+ *
8
+ * This is the first port introduced in the Ports & Adapters migration.
9
+ * It establishes the pattern: interface in core, adapters in eval.
10
+ */
11
+ import type { StepResult } from "../types/index.js";
12
+ /**
13
+ * Logger port — the contract for all pipeline output.
14
+ *
15
+ * Implementations:
16
+ * - ConsoleLogger — emoji-prefix human output (default)
17
+ * - QuietLogger — errors and failures only
18
+ * - JsonLogger — NDJSON machine-readable output
19
+ * - SilentLogger — captures messages for test assertions
20
+ */
21
+ export interface Logger {
22
+ /** Debug-level output (only shown with --verbose) */
23
+ debug(msg: string, ctx?: Record<string, unknown>): void;
24
+ /** Informational output (shown by default) */
25
+ info(msg: string, ctx?: Record<string, unknown>): void;
26
+ /** Warning output (always shown unless --quiet) */
27
+ warn(msg: string, ctx?: Record<string, unknown>): void;
28
+ /** Error output (always shown) */
29
+ error(msg: string, ctx?: Record<string, unknown>): void;
30
+ /** Pipeline step result (formatted with timing, icon, and summary) */
31
+ step(name: string, result: StepResult): void;
32
+ /** Section header (═══ formatted) */
33
+ section(title: string): void;
34
+ /** Table output (for score matrices, comparison tables, etc.) */
35
+ table(headers: string[], rows: string[][]): void;
36
+ }
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Port: Structured logging.
3
+ *
4
+ * All pipeline output flows through this interface. Adapters control
5
+ * formatting (emoji console, plain text, JSON, silent) without
6
+ * changing the calling code.
7
+ *
8
+ * This is the first port introduced in the Ports & Adapters migration.
9
+ * It establishes the pattern: interface in core, adapters in eval.
10
+ */
11
+ export {};
@@ -0,0 +1,46 @@
1
+ /**
2
+ * Port: A single step in the evaluation pipeline.
3
+ *
4
+ * Steps are composable, independently testable, and uniformly executed
5
+ * by the StepRunner. Each step declares its preconditions, execution
6
+ * logic, and cache inputs.
7
+ */
8
+ import type { PipelineState, StepResult, ValidationIssue } from "../types/index.js";
9
+ import type { AppContext } from "./context.js";
10
+ export interface PipelineStep {
11
+ /** Human-readable step name for logging and diagnostics */
12
+ readonly name: string;
13
+ /**
14
+ * Precondition check — validates that required files, config, and
15
+ * environment are in place before execution.
16
+ *
17
+ * @returns Array of validation issues (empty = all clear)
18
+ */
19
+ check(ctx: AppContext): ValidationIssue[];
20
+ /**
21
+ * Execute the step. May read from filesystem, call APIs, spawn
22
+ * processes, or delegate to domain services.
23
+ *
24
+ * @param ctx — Immutable application context (config, adapters, logger)
25
+ * @param state — Mutable state bag for inter-step data flow. Producer
26
+ * steps write values (e.g., `state.reportId`); consumer steps read
27
+ * them. Steps that don't participate in inter-step data flow can
28
+ * ignore this parameter.
29
+ *
30
+ * @returns Step result with status, duration, and summary/error
31
+ */
32
+ execute(ctx: AppContext, state: PipelineState): Promise<StepResult>;
33
+ /**
34
+ * Cache key inputs — file paths or content identifiers that determine
35
+ * whether this step's output is still valid.
36
+ *
37
+ * When undefined, the step is never cached (always executes).
38
+ * When defined, the StepRunner computes a hash and checks the cache.
39
+ */
40
+ cacheInputs?(ctx: AppContext): string[];
41
+ /**
42
+ * Whether this step is optional — a failure in an optional step
43
+ * does not stop the pipeline.
44
+ */
45
+ readonly optional?: boolean;
46
+ }
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Port: A single step in the evaluation pipeline.
3
+ *
4
+ * Steps are composable, independently testable, and uniformly executed
5
+ * by the StepRunner. Each step declares its preconditions, execution
6
+ * logic, and cache inputs.
7
+ */
8
+ export {};