@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,153 @@
1
+ /**
2
+ * Trigger processing for repo-based AILF tasks.
3
+ *
4
+ * Reads .ailf/config.yaml, determines the current execution context
5
+ * (PR, main merge, schedule, manual), and resolves which trigger
6
+ * config applies. This drives whether the pipeline runs in validate-only
7
+ * mode or full eval mode, and whether results are blocking.
8
+ *
9
+ * @see docs/exec-plans/completed/tasks-as-content/phase-4-repo-based-tasks.md
10
+ */
11
+ import { existsSync, readFileSync } from "fs";
12
+ import { resolve } from "path";
13
+ import { load } from "js-yaml";
14
+ import { parseRepoConfig } from "./repo-schemas.js";
15
+ // ---------------------------------------------------------------------------
16
+ // Public API
17
+ // ---------------------------------------------------------------------------
18
+ /**
19
+ * Load and resolve the trigger configuration for the current context.
20
+ *
21
+ * @param repoRoot - Root of the external repo (contains .ailf/)
22
+ * @param context - The current execution context (PR, main, schedule, manual)
23
+ * @returns The resolved trigger config, or a default (eval, non-blocking)
24
+ */
25
+ export function resolveTrigger(repoRoot, context) {
26
+ const configPath = resolve(repoRoot, ".ailf", "config.yaml");
27
+ if (!existsSync(configPath)) {
28
+ // No config file — use sensible defaults
29
+ return defaultTrigger(context);
30
+ }
31
+ const raw = readFileSync(configPath, "utf-8");
32
+ const parsed = load(raw);
33
+ const config = parseRepoConfig(parsed);
34
+ return matchTrigger(config, context);
35
+ }
36
+ /**
37
+ * Detect the current trigger context from environment variables.
38
+ *
39
+ * Reads GitHub Actions env vars to determine if we're in a PR,
40
+ * on the main branch, running on schedule, or manual dispatch.
41
+ */
42
+ export function detectTriggerContext() {
43
+ const eventName = process.env.GITHUB_EVENT_NAME;
44
+ const ref = process.env.GITHUB_REF ?? "";
45
+ if (eventName === "schedule") {
46
+ return { type: "schedule" };
47
+ }
48
+ if (eventName === "workflow_dispatch") {
49
+ return { type: "manual" };
50
+ }
51
+ if (eventName === "pull_request" || eventName === "pull_request_target") {
52
+ const branch = process.env.GITHUB_HEAD_REF ?? ref.replace("refs/heads/", "");
53
+ // Changed files could come from GITHUB_EVENT_PATH or git diff
54
+ // For now, return without changedFiles — the caller can populate from git
55
+ return { type: "pr", branch };
56
+ }
57
+ // Default: check if we're on main/master
58
+ if (ref === "refs/heads/main" || ref === "refs/heads/master") {
59
+ return { type: "main" };
60
+ }
61
+ return { type: "manual" };
62
+ }
63
+ // ---------------------------------------------------------------------------
64
+ // Internal matching
65
+ // ---------------------------------------------------------------------------
66
+ function matchTrigger(config, context) {
67
+ const triggers = config.triggers;
68
+ if (!triggers)
69
+ return defaultTrigger(context);
70
+ switch (context.type) {
71
+ case "pr": {
72
+ // Check pr-task-change first (more specific), then pr (general)
73
+ const prTaskChange = triggers["pr-task-change"];
74
+ if (prTaskChange &&
75
+ hasPathMatch(prTaskChange.paths, context.changedFiles)) {
76
+ return {
77
+ matched: true,
78
+ mode: prTaskChange.mode ?? "eval",
79
+ blocking: prTaskChange.blocking ?? false,
80
+ notify: prTaskChange.notify ?? true,
81
+ compare: prTaskChange.compare ?? false,
82
+ paths: prTaskChange.paths,
83
+ };
84
+ }
85
+ const pr = triggers.pr;
86
+ if (pr) {
87
+ return {
88
+ matched: true,
89
+ mode: pr.mode ?? "validate-only",
90
+ blocking: pr.blocking ?? false,
91
+ notify: pr.notify ?? true,
92
+ compare: pr.compare ?? false,
93
+ paths: pr.paths,
94
+ };
95
+ }
96
+ return defaultTrigger(context);
97
+ }
98
+ case "main": {
99
+ const main = triggers.main;
100
+ if (main) {
101
+ return {
102
+ matched: true,
103
+ mode: main.mode ?? "eval",
104
+ blocking: main.blocking ?? false,
105
+ notify: main.notify ?? true,
106
+ compare: main.compare ?? false,
107
+ };
108
+ }
109
+ return defaultTrigger(context);
110
+ }
111
+ case "schedule": {
112
+ const schedule = triggers.schedule;
113
+ if (schedule) {
114
+ return {
115
+ matched: true,
116
+ mode: schedule.mode ?? "eval",
117
+ blocking: false, // schedule is never blocking
118
+ notify: schedule.notify ?? true,
119
+ compare: schedule.compare ?? true,
120
+ cron: schedule.cron,
121
+ };
122
+ }
123
+ return defaultTrigger(context);
124
+ }
125
+ case "manual":
126
+ return defaultTrigger(context);
127
+ }
128
+ }
129
+ /**
130
+ * Check if any changed files match the trigger's path patterns.
131
+ * Uses simple glob-like prefix matching (not full glob).
132
+ * Returns true if no paths are specified (match-all).
133
+ */
134
+ function hasPathMatch(patterns, changedFiles) {
135
+ if (!patterns || patterns.length === 0)
136
+ return true;
137
+ if (!changedFiles || changedFiles.length === 0)
138
+ return false;
139
+ return changedFiles.some((file) => patterns.some((pattern) => {
140
+ // Simple glob: ".ailf/**" matches any path starting with ".ailf/"
141
+ const prefix = pattern.replace(/\*\*$/, "").replace(/\*$/, "");
142
+ return file.startsWith(prefix);
143
+ }));
144
+ }
145
+ function defaultTrigger(context) {
146
+ return {
147
+ matched: false,
148
+ mode: context.type === "pr" ? "validate-only" : "eval",
149
+ blocking: false,
150
+ notify: context.type !== "manual",
151
+ compare: context.type === "schedule",
152
+ };
153
+ }
@@ -0,0 +1,49 @@
1
+ /**
2
+ * Semantic validation for repo-based tasks.
3
+ *
4
+ * Checks that go beyond Zod schema parsing:
5
+ * - Assertion types are in the curated set
6
+ * - Rubric template names resolve to known templates
7
+ * - Feature area strings are well-formed
8
+ * - Canonical doc slugs look reasonable (slugs, not URLs)
9
+ *
10
+ * These produce warnings, not errors — the pipeline can still run
11
+ * with imperfect tasks. Only structural failures (caught by Zod) block.
12
+ *
13
+ * @see packages/eval/src/adapters/task-sources/repo-schemas.ts
14
+ */
15
+ import { type RepoTask } from "./repo-schemas.js";
16
+ export interface ValidationResult {
17
+ valid: boolean;
18
+ errors: ValidationMessage[];
19
+ warnings: ValidationMessage[];
20
+ }
21
+ export interface ValidationMessage {
22
+ taskId: string;
23
+ field: string;
24
+ message: string;
25
+ }
26
+ /**
27
+ * Run semantic validation on an array of parsed repo tasks.
28
+ *
29
+ * Returns warnings for issues that don't block execution (unknown feature
30
+ * areas, unresolved slugs) and errors for issues that would cause pipeline
31
+ * failures (completely missing required fields — though Zod catches most).
32
+ */
33
+ export declare function validateRepoTasks(tasks: RepoTask[]): ValidationResult;
34
+ /**
35
+ * Format validation results for console output.
36
+ */
37
+ export declare function formatValidationResult(result: ValidationResult): string;
38
+ /**
39
+ * Detect snake_case field names in raw task YAML data.
40
+ *
41
+ * This runs BEFORE Zod parsing to provide a user-friendly error message
42
+ * when authors use framework-internal snake_case names instead of the
43
+ * camelCase names expected in repo task files.
44
+ *
45
+ * @param raw - Raw parsed YAML (before Zod validation)
46
+ * @param filename - Source filename for error messages
47
+ * @returns Array of warning messages (empty if no issues)
48
+ */
49
+ export declare function detectSnakeCaseFields(raw: unknown, filename: string): string[];
@@ -0,0 +1,164 @@
1
+ /**
2
+ * Semantic validation for repo-based tasks.
3
+ *
4
+ * Checks that go beyond Zod schema parsing:
5
+ * - Assertion types are in the curated set
6
+ * - Rubric template names resolve to known templates
7
+ * - Feature area strings are well-formed
8
+ * - Canonical doc slugs look reasonable (slugs, not URLs)
9
+ *
10
+ * These produce warnings, not errors — the pipeline can still run
11
+ * with imperfect tasks. Only structural failures (caught by Zod) block.
12
+ *
13
+ * @see packages/eval/src/adapters/task-sources/repo-schemas.ts
14
+ */
15
+ import { CURATED_ASSERTION_TYPES, RUBRIC_TEMPLATE_NAMES, } from "./repo-schemas.js";
16
+ // ---------------------------------------------------------------------------
17
+ // Public API
18
+ // ---------------------------------------------------------------------------
19
+ /**
20
+ * Run semantic validation on an array of parsed repo tasks.
21
+ *
22
+ * Returns warnings for issues that don't block execution (unknown feature
23
+ * areas, unresolved slugs) and errors for issues that would cause pipeline
24
+ * failures (completely missing required fields — though Zod catches most).
25
+ */
26
+ export function validateRepoTasks(tasks) {
27
+ const errors = [];
28
+ const warnings = [];
29
+ // Check for duplicate IDs
30
+ const seenIds = new Set();
31
+ for (const task of tasks) {
32
+ if (seenIds.has(task.id)) {
33
+ errors.push({
34
+ taskId: task.id,
35
+ field: "id",
36
+ message: `Duplicate task ID "${task.id}"`,
37
+ });
38
+ }
39
+ seenIds.add(task.id);
40
+ }
41
+ for (const task of tasks) {
42
+ // Check assertion types
43
+ for (let i = 0; i < task.assert.length; i++) {
44
+ const assertion = task.assert[i];
45
+ if (!CURATED_ASSERTION_TYPES.includes(assertion.type)) {
46
+ warnings.push({
47
+ taskId: task.id,
48
+ field: `assert[${i}].type`,
49
+ message: `Unknown assertion type "${assertion.type}". ` +
50
+ `Valid types: ${CURATED_ASSERTION_TYPES.join(", ")}`,
51
+ });
52
+ }
53
+ // Check rubric template for llm-rubric assertions
54
+ if (assertion.type === "llm-rubric" && "template" in assertion) {
55
+ const template = assertion.template;
56
+ if (!RUBRIC_TEMPLATE_NAMES.includes(template)) {
57
+ warnings.push({
58
+ taskId: task.id,
59
+ field: `assert[${i}].template`,
60
+ message: `Unknown rubric template "${template}". ` +
61
+ `Valid templates: ${RUBRIC_TEMPLATE_NAMES.join(", ")}`,
62
+ });
63
+ }
64
+ }
65
+ }
66
+ // Check canonical doc refs look reasonable
67
+ for (let i = 0; i < (task.canonicalDocs?.length ?? 0); i++) {
68
+ const doc = task.canonicalDocs[i];
69
+ // Slug refs: warn if they look like URLs or paths
70
+ if ("slug" in doc && !("id" in doc) && typeof doc.slug === "string") {
71
+ if (doc.slug.includes("/") || doc.slug.includes("http")) {
72
+ warnings.push({
73
+ taskId: task.id,
74
+ field: `canonicalDocs[${i}].slug`,
75
+ message: `Slug "${doc.slug}" looks like a URL or path — use 'path' type for paths or 'slug' for document slugs (e.g., "groq-introduction")`,
76
+ });
77
+ }
78
+ }
79
+ }
80
+ // Check task has at least one llm-rubric assertion (recommended but not required)
81
+ const hasLlmRubric = task.assert.some((a) => a.type === "llm-rubric");
82
+ if (!hasLlmRubric) {
83
+ warnings.push({
84
+ taskId: task.id,
85
+ field: "assert",
86
+ message: "No llm-rubric assertion found. Tasks should have at least one scored rubric for meaningful evaluation.",
87
+ });
88
+ }
89
+ // Check taskPrompt exists in vars (vars.task)
90
+ if (!task.vars?.task) {
91
+ warnings.push({
92
+ taskId: task.id,
93
+ field: "vars.task",
94
+ message: "No task prompt found in vars.task. The LLM will receive an empty implementation request.",
95
+ });
96
+ }
97
+ }
98
+ return {
99
+ valid: errors.length === 0,
100
+ errors,
101
+ warnings,
102
+ };
103
+ }
104
+ /**
105
+ * Format validation results for console output.
106
+ */
107
+ export function formatValidationResult(result) {
108
+ const lines = [];
109
+ if (result.errors.length > 0) {
110
+ lines.push("❌ Errors:");
111
+ for (const e of result.errors) {
112
+ lines.push(` [${e.taskId}] ${e.field}: ${e.message}`);
113
+ }
114
+ }
115
+ if (result.warnings.length > 0) {
116
+ lines.push("⚠️ Warnings:");
117
+ for (const w of result.warnings) {
118
+ lines.push(` [${w.taskId}] ${w.field}: ${w.message}`);
119
+ }
120
+ }
121
+ if (result.valid && result.warnings.length === 0) {
122
+ lines.push("✅ All repo tasks pass validation");
123
+ }
124
+ return lines.join("\n");
125
+ }
126
+ // ---------------------------------------------------------------------------
127
+ // Snake_case detection (pre-parse helper)
128
+ // ---------------------------------------------------------------------------
129
+ /** Known snake_case → camelCase field mappings for common errors */
130
+ const SNAKE_TO_CAMEL = {
131
+ feature_area: "featureArea",
132
+ canonical_docs: "canonicalDocs",
133
+ doc_coverage: "docCoverage",
134
+ reference_solution: "referenceSolution",
135
+ };
136
+ /**
137
+ * Detect snake_case field names in raw task YAML data.
138
+ *
139
+ * This runs BEFORE Zod parsing to provide a user-friendly error message
140
+ * when authors use framework-internal snake_case names instead of the
141
+ * camelCase names expected in repo task files.
142
+ *
143
+ * @param raw - Raw parsed YAML (before Zod validation)
144
+ * @param filename - Source filename for error messages
145
+ * @returns Array of warning messages (empty if no issues)
146
+ */
147
+ export function detectSnakeCaseFields(raw, filename) {
148
+ const warnings = [];
149
+ if (!Array.isArray(raw))
150
+ return warnings;
151
+ for (let i = 0; i < raw.length; i++) {
152
+ const entry = raw[i];
153
+ if (typeof entry !== "object" || entry === null)
154
+ continue;
155
+ const obj = entry;
156
+ const taskId = typeof obj.id === "string" ? obj.id : `task[${i}]`;
157
+ for (const [snake, camel] of Object.entries(SNAKE_TO_CAMEL)) {
158
+ if (snake in obj) {
159
+ warnings.push(`[${filename}] ${taskId}: Found "${snake}" — repo tasks use camelCase. Did you mean "${camel}"?`);
160
+ }
161
+ }
162
+ }
163
+ return warnings;
164
+ }
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Adapter: Load task definitions from tasks/*.yaml files.
3
+ *
4
+ * This adapter reads the raw YAML task definitions (before Promptfoo
5
+ * expansion) and maps them to the canonical TaskDefinition type from
6
+ * @sanity/ailf-core. It handles area filtering (filename stem) and
7
+ * task ID filtering.
8
+ *
9
+ * Unlike loadAndExpandTasks() — which produces Promptfoo-specific
10
+ * ExpandedTestEntry objects — this adapter produces domain-level
11
+ * TaskDefinition objects suitable for the pipeline orchestrator.
12
+ */
13
+ import type { FilterOptions, TaskDefinition, TaskSource } from "../../_vendor/ailf-core/index.d.ts";
14
+ export declare class YamlTaskSource implements TaskSource {
15
+ private readonly rootDir;
16
+ constructor(rootDir: string);
17
+ loadTasks(filter?: FilterOptions): Promise<TaskDefinition[]>;
18
+ }
@@ -0,0 +1,136 @@
1
+ /**
2
+ * Adapter: Load task definitions from tasks/*.yaml files.
3
+ *
4
+ * This adapter reads the raw YAML task definitions (before Promptfoo
5
+ * expansion) and maps them to the canonical TaskDefinition type from
6
+ * @sanity/ailf-core. It handles area filtering (filename stem) and
7
+ * task ID filtering.
8
+ *
9
+ * Unlike loadAndExpandTasks() — which produces Promptfoo-specific
10
+ * ExpandedTestEntry objects — this adapter produces domain-level
11
+ * TaskDefinition objects suitable for the pipeline orchestrator.
12
+ */
13
+ import { existsSync, readdirSync, readFileSync } from "fs";
14
+ import { resolve } from "path";
15
+ import { load } from "js-yaml";
16
+ // ---------------------------------------------------------------------------
17
+ // YamlTaskSource adapter
18
+ // ---------------------------------------------------------------------------
19
+ export class YamlTaskSource {
20
+ rootDir;
21
+ constructor(rootDir) {
22
+ this.rootDir = rootDir;
23
+ }
24
+ async loadTasks(filter) {
25
+ const tasksDir = resolve(this.rootDir, "tasks");
26
+ if (!existsSync(tasksDir)) {
27
+ throw new Error(`tasks/ directory not found at ${tasksDir}`);
28
+ }
29
+ let yamlFiles = readdirSync(tasksDir)
30
+ .filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."))
31
+ .sort();
32
+ // Area filter — area name = filename stem (e.g., "groq" matches "groq.yaml")
33
+ if (filter?.areas && filter.areas.length > 0) {
34
+ const allowedAreas = new Set(filter.areas.map((a) => a.toLowerCase()));
35
+ yamlFiles = yamlFiles.filter((f) => {
36
+ const stem = f.replace(/\.ya?ml$/, "").toLowerCase();
37
+ return allowedAreas.has(stem);
38
+ });
39
+ }
40
+ const definitions = [];
41
+ for (const file of yamlFiles) {
42
+ const featureArea = file.replace(/\.ya?ml$/, "");
43
+ const filePath = resolve(tasksDir, file);
44
+ const raw = readFileSync(filePath, "utf-8");
45
+ const parsed = load(raw);
46
+ if (!Array.isArray(parsed)) {
47
+ throw new Error(`${file} did not parse to an array of tasks`);
48
+ }
49
+ for (const entry of parsed) {
50
+ if (!isRawYamlTask(entry))
51
+ continue;
52
+ // Task ID filter
53
+ if (filter?.taskIds &&
54
+ filter.taskIds.length > 0 &&
55
+ !filter.taskIds.includes(entry.id)) {
56
+ continue;
57
+ }
58
+ definitions.push(mapToTaskDefinition(entry, featureArea));
59
+ }
60
+ }
61
+ return definitions;
62
+ }
63
+ }
64
+ // ---------------------------------------------------------------------------
65
+ // Mapping helpers
66
+ // ---------------------------------------------------------------------------
67
+ /**
68
+ * Map a raw YAML entry to a canonical TaskDefinition.
69
+ *
70
+ * Renames snake_case YAML keys to camelCase domain types and extracts
71
+ * the task prompt from `vars.task`. Additional vars beyond `task` and
72
+ * `docs` are collected into `extraVars`.
73
+ */
74
+ function mapToTaskDefinition(raw, featureArea) {
75
+ const { task, docs: _docs, ...rest } = (raw.vars ?? {});
76
+ const canonicalDocs = (raw.canonical_docs ?? [])
77
+ .map(mapCanonicalDoc)
78
+ .filter((d) => d !== null);
79
+ const extraVars = Object.keys(rest).length > 0 ? rest : undefined;
80
+ return {
81
+ id: raw.id,
82
+ description: raw.description,
83
+ featureArea,
84
+ taskPrompt: typeof task === "string" ? task : "",
85
+ canonicalDocs,
86
+ referenceSolution: raw.reference_solution ?? "",
87
+ docCoverage: raw.doc_coverage ?? false,
88
+ assertions: (raw.assert ?? []),
89
+ ...(raw.baseline ? { baseline: raw.baseline } : {}),
90
+ ...(extraVars ? { extraVars } : {}),
91
+ };
92
+ }
93
+ // ---------------------------------------------------------------------------
94
+ // Canonical doc mapping
95
+ // ---------------------------------------------------------------------------
96
+ /**
97
+ * Map a raw YAML canonical doc entry to the polymorphic CanonicalDocRef.
98
+ *
99
+ * Discriminates by key presence: slug, path, id, or perspective.
100
+ * Returns null (with a warning) if no valid resolution key is found.
101
+ */
102
+ function mapCanonicalDoc(raw) {
103
+ const reason = raw.reason ?? "";
104
+ // Resolution priority: id > slug > path > perspective.
105
+ // When `id` is present it's always an IdDocRef — slug and path are carried
106
+ // as optional annotations for human readability (they are NOT used for
107
+ // resolution by the pipeline).
108
+ if (raw.id) {
109
+ return {
110
+ id: raw.id,
111
+ reason,
112
+ ...(raw.slug ? { slug: raw.slug } : {}),
113
+ ...(raw.path ? { path: raw.path } : {}),
114
+ };
115
+ }
116
+ if (raw.slug)
117
+ return { slug: raw.slug, reason };
118
+ if (raw.path)
119
+ return { path: raw.path, reason };
120
+ if (raw.perspective)
121
+ return { perspective: raw.perspective, reason };
122
+ console.warn(" [warn] Skipping canonical doc entry with no resolution key (id, slug, path, or perspective)");
123
+ return null;
124
+ }
125
+ // ---------------------------------------------------------------------------
126
+ // Type guard
127
+ // ---------------------------------------------------------------------------
128
+ /** Check whether a parsed YAML value looks like a single task definition. */
129
+ function isRawYamlTask(entry) {
130
+ return (typeof entry === "object" &&
131
+ entry !== null &&
132
+ "id" in entry &&
133
+ typeof entry.id === "string" &&
134
+ "description" in entry &&
135
+ typeof entry.description === "string");
136
+ }
@@ -0,0 +1,132 @@
1
+ /**
2
+ * agentic-provider.ts
3
+ *
4
+ * An agentic Promptfoo provider that gives the model web_search and
5
+ * fetch_page tools, simulating how real AI agents behave when a user
6
+ * asks a development question.
7
+ *
8
+ * Supports two agent modes via the `agentMode` config:
9
+ *
10
+ * - "naive" — Simulates current agents (Claude Code, ChatGPT, Cursor):
11
+ * uses Jina Reader for search + page fetching because real
12
+ * agents can't render JavaScript-heavy SPAs server-side.
13
+ *
14
+ * - "optimized" — Simulates an ideal agent that knows about Sanity's
15
+ * agent-friendly endpoints: fetches .md versions of doc
16
+ * pages directly, uses llms.txt for doc discovery, and
17
+ * falls back to Jina only for non-Sanity pages.
18
+ *
19
+ * All HTTP requests go through the RequestRecorder, so they're automatically
20
+ * classified as docPageVisits, searchQueries, etc.
21
+ *
22
+ * Promptfoo config usage:
23
+ *
24
+ * providers:
25
+ * - id: file://dist/agent-observer/agentic-provider.js
26
+ * label: "GPT-4o (Naive Agent)"
27
+ * config:
28
+ * model: gpt-4o
29
+ * agentMode: naive # or "optimized"
30
+ * maxToolRounds: 5
31
+ */
32
+ import { RequestRecorder } from "./proxy.js";
33
+ interface CallApiContextParams {
34
+ prompt?: {
35
+ raw: string;
36
+ label?: string;
37
+ };
38
+ vars?: Record<string, object | string>;
39
+ }
40
+ interface ProviderOptions {
41
+ config?: Record<string, unknown>;
42
+ id?: string;
43
+ }
44
+ interface ProviderResponse {
45
+ cached?: boolean;
46
+ cost?: number;
47
+ error?: string;
48
+ metadata?: Record<string, unknown>;
49
+ output?: object | string;
50
+ tokenUsage?: {
51
+ total?: number;
52
+ prompt?: number;
53
+ completion?: number;
54
+ cached?: number;
55
+ };
56
+ }
57
+ export default class AgenticProvider {
58
+ config: Record<string, unknown>;
59
+ protected providerId: string;
60
+ private agentMode;
61
+ private allowedOrigins;
62
+ private customHeaders;
63
+ private docBaseUrl;
64
+ private docsUrlPattern;
65
+ private llmsTxtUrl;
66
+ private priorityDomain;
67
+ private recorder;
68
+ private searchMode;
69
+ constructor(options: ProviderOptions);
70
+ /**
71
+ * Main Promptfoo provider entry point. Runs the full agentic loop.
72
+ */
73
+ callApi(prompt: string, context?: CallApiContextParams): Promise<ProviderResponse>;
74
+ /**
75
+ * Exposes the recorder for external integrations.
76
+ */
77
+ getRecorder(): RequestRecorder;
78
+ id(): string;
79
+ /**
80
+ * Build the system prompt based on agent mode and configured doc URLs.
81
+ */
82
+ private buildSystemPrompt;
83
+ /**
84
+ * Cleans Jina Reader markdown output by removing navigation boilerplate,
85
+ * cookie banners, and footer content.
86
+ */
87
+ private cleanJinaContent;
88
+ /**
89
+ * Detect which LLM provider to use based on config and model name.
90
+ * Reads the `provider` config field set by generate-configs, with
91
+ * fallback heuristics for backward compatibility.
92
+ */
93
+ private detectProvider;
94
+ private executeFetchPage;
95
+ private executeListDocs;
96
+ private executeTool;
97
+ private executeWebSearch;
98
+ /**
99
+ * Build the set of tools available to the agent based on search mode
100
+ * and agent mode. When searchMode is "off", web_search is excluded
101
+ * entirely — the model can't call what it can't see.
102
+ */
103
+ private getAvailableTools;
104
+ /**
105
+ * Merge custom headers into a request's headers.
106
+ * Custom headers are injected into doc-site requests only — never into
107
+ * external APIs (OpenAI, Jina, Google).
108
+ */
109
+ private mergeDocHeaders;
110
+ /**
111
+ * Parses search results from Jina Reader markdown output.
112
+ */
113
+ private parseSearchResults;
114
+ /**
115
+ * Runs the agentic tool-calling loop. Routes to OpenAI or Anthropic
116
+ * based on the `provider` config field.
117
+ */
118
+ private runAgenticLoop;
119
+ private runAnthropicLoop;
120
+ private runOpenAILoop;
121
+ /**
122
+ * Strips HTML tags and normalizes whitespace. Fallback for when
123
+ * neither .md endpoints nor Jina are available.
124
+ */
125
+ private stripHtml;
126
+ /**
127
+ * Convert our ToolDefinition[] to Anthropic's tool format.
128
+ * Anthropic uses `input_schema` instead of `parameters`.
129
+ */
130
+ private toAnthropicTools;
131
+ }
132
+ export {};