@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,137 @@
1
+ /**
2
+ * types.ts
3
+ *
4
+ * Data structures for capturing AI agent behavior during evaluation.
5
+ *
6
+ * When an AI agent runs a task (e.g., "implement a Sanity studio schema"),
7
+ * it may browse documentation, search for APIs, download code samples, etc.
8
+ * These types describe the observed network activity so we can answer:
9
+ *
10
+ * - What URLs did the agent visit?
11
+ * - What content did it actually download?
12
+ * - Did it search sanity.io/docs? What queries?
13
+ * - How long did it spend fetching vs generating?
14
+ * - Did it find the *right* documentation pages?
15
+ */
16
+ export interface AgentBehaviorLog {
17
+ /** Sanity API calls (non-docs) */
18
+ apiCalls: ApiCall[];
19
+ /** Sanity doc pages the agent visited */
20
+ docPageVisits: DocPageVisit[];
21
+ /** ISO 8601 timestamp when observation ended */
22
+ endedAt: string;
23
+ /** Requests to non-Sanity domains */
24
+ externalRequests: ExternalRequest[];
25
+ /** Time spent waiting on network requests, in ms */
26
+ networkDurationMs: number;
27
+ /** Provider that was observed (e.g., "openai:gpt-4o") */
28
+ provider: string;
29
+ /** Every HTTP request/response observed, in order */
30
+ requests: ObservedRequest[];
31
+ /** Search queries the agent performed */
32
+ searchQueries: SearchQuery[];
33
+ /** Unique identifier for this observation session */
34
+ sessionId: string;
35
+ /** ISO 8601 timestamp when observation started */
36
+ startedAt: string;
37
+ summary: AgentBehaviorSummary;
38
+ /** Task description from the test case */
39
+ taskDescription: string;
40
+ /** Total wall-clock time for the test, in ms */
41
+ totalDurationMs: number;
42
+ }
43
+ /** Roll-up stats for quick analysis */
44
+ export interface AgentBehaviorSummary {
45
+ /** Number of Sanity API calls */
46
+ apiCallCount: number;
47
+ /** Number of sanity.io doc pages visited */
48
+ docPagesVisited: number;
49
+ /** List of unique sanity.io doc slugs visited */
50
+ docSlugsVisited: string[];
51
+ /** List of unique external domains contacted */
52
+ externalDomains: string[];
53
+ /** Number of external (non-Sanity) requests */
54
+ externalRequestCount: number;
55
+ /** Number of search queries performed */
56
+ searchesPerformed: number;
57
+ /** Total bytes downloaded */
58
+ totalBytesDownloaded: number;
59
+ /** Total network time in ms */
60
+ totalNetworkMs: number;
61
+ /** Total number of HTTP requests observed */
62
+ totalRequests: number;
63
+ /** List of unique search queries */
64
+ uniqueSearchQueries: string[];
65
+ /** Number of unique URLs visited */
66
+ uniqueUrls: number;
67
+ /** Whether the agent visited any sanity.io docs at all */
68
+ usedDocs: boolean;
69
+ /** Whether the agent performed any searches */
70
+ usedSearch: boolean;
71
+ }
72
+ /** An API call to Sanity's API (not docs) */
73
+ export interface ApiCall {
74
+ /** API endpoint path */
75
+ endpoint: string;
76
+ /** HTTP method */
77
+ method: string;
78
+ /** Timestamp */
79
+ timestamp: string;
80
+ /** Full URL */
81
+ url: string;
82
+ }
83
+ /** A page the agent visited on sanity.io/docs */
84
+ export interface DocPageVisit {
85
+ /** Response size in bytes */
86
+ contentSize: number;
87
+ /** Slug extracted from the URL, e.g., "groq-introduction" */
88
+ slug: string;
89
+ /** Timestamp of the visit */
90
+ timestamp: string;
91
+ /** Page title if extractable from response */
92
+ title?: string;
93
+ url: string;
94
+ }
95
+ /** A request to a non-Sanity URL */
96
+ export interface ExternalRequest {
97
+ /** Domain extracted from URL */
98
+ domain: string;
99
+ method: string;
100
+ timestamp: string;
101
+ url: string;
102
+ }
103
+ export interface ObservedRequest {
104
+ /** Request body (for POST searches, etc.), truncated to maxBodyBytes */
105
+ body?: string;
106
+ /** Content-Type of the response */
107
+ contentType?: string;
108
+ /** Relevant request headers (e.g., Accept, User-Agent) */
109
+ headers: Record<string, string>;
110
+ /** Time from request start to response complete, in ms */
111
+ latencyMs: number;
112
+ /** HTTP method */
113
+ method: string;
114
+ /** Response body preview (first N chars), useful for seeing what the agent actually read */
115
+ responsePreview?: string;
116
+ /** Response body size in bytes */
117
+ responseSize: number;
118
+ /** Monotonic sequence number within the test run */
119
+ seq: number;
120
+ /** HTTP status code of the response */
121
+ statusCode: number;
122
+ /** ISO 8601 timestamp when the request was initiated */
123
+ timestamp: string;
124
+ /** Full URL requested */
125
+ url: string;
126
+ }
127
+ /** A search query the agent performed */
128
+ export interface SearchQuery {
129
+ /** Extracted query string */
130
+ query: string;
131
+ /** Number of results returned (if detectable) */
132
+ resultCount?: number;
133
+ /** Timestamp */
134
+ timestamp: string;
135
+ /** The search endpoint URL */
136
+ url: string;
137
+ }
@@ -0,0 +1,16 @@
1
+ /**
2
+ * types.ts
3
+ *
4
+ * Data structures for capturing AI agent behavior during evaluation.
5
+ *
6
+ * When an AI agent runs a task (e.g., "implement a Sanity studio schema"),
7
+ * it may browse documentation, search for APIs, download code samples, etc.
8
+ * These types describe the observed network activity so we can answer:
9
+ *
10
+ * - What URLs did the agent visit?
11
+ * - What content did it actually download?
12
+ * - Did it search sanity.io/docs? What queries?
13
+ * - How long did it spend fetching vs generating?
14
+ * - Did it find the *right* documentation pages?
15
+ */
16
+ export {};
@@ -0,0 +1,72 @@
1
+ /**
2
+ * source-isolation.ts
3
+ *
4
+ * Promptfoo custom assertion: verifies that the agentic provider only
5
+ * fetched documentation from the configured allowed origins.
6
+ *
7
+ * Compiled to dist/assertions/source-isolation.js and referenced via
8
+ * file:// in generated Promptfoo configs.
9
+ *
10
+ * The assertion has weight: 0 — it doesn't affect scores. It surfaces
11
+ * as an advisory pass/fail signal in results.
12
+ *
13
+ * @see docs/exec-plans/completed/source-aware-eval-isolation.md (Phase 3b)
14
+ */
15
+ export interface SourceIsolationReport {
16
+ blocked: number;
17
+ isolationScore: number;
18
+ offOrigin: number;
19
+ offOriginUrls: string[];
20
+ onOrigin: number;
21
+ originBreakdown: Record<string, number>;
22
+ total: number;
23
+ }
24
+ interface AgentBehaviorData {
25
+ docPageVisits?: {
26
+ slug: string;
27
+ url: string;
28
+ }[];
29
+ requests?: {
30
+ statusCode: number;
31
+ url: string;
32
+ }[];
33
+ }
34
+ interface AssertionContext {
35
+ providerResponse?: {
36
+ metadata?: {
37
+ agentBehavior?: AgentBehaviorData;
38
+ };
39
+ };
40
+ vars?: Record<string, string>;
41
+ }
42
+ interface GradingResult {
43
+ pass: boolean;
44
+ reason: string;
45
+ score: number;
46
+ }
47
+ /**
48
+ * Analyze doc page visits and compute an isolation score.
49
+ *
50
+ * The isolation score is the fraction of doc-fetching requests that went
51
+ * to an allowed origin. A score of 1.0 means perfect isolation.
52
+ *
53
+ * @param docPageVisits - Classified doc page visit records
54
+ * @param allowedOrigins - Origin patterns (glob-capable)
55
+ * @returns Isolation report with score and breakdown
56
+ */
57
+ export declare function analyzeSourceIsolation(docPageVisits: {
58
+ url: string;
59
+ }[], allowedOrigins: string[]): SourceIsolationReport;
60
+ /**
61
+ * Promptfoo custom assertion function.
62
+ *
63
+ * Called by Promptfoo for each test case when referenced as:
64
+ * type: javascript
65
+ * value: file://dist/assertions/source-isolation.js
66
+ * weight: 0
67
+ *
68
+ * @param output - The model's text output (unused)
69
+ * @param context - Promptfoo assertion context with provider metadata
70
+ */
71
+ export default function (_output: string, context: AssertionContext): GradingResult;
72
+ export {};
@@ -0,0 +1,117 @@
1
+ /**
2
+ * source-isolation.ts
3
+ *
4
+ * Promptfoo custom assertion: verifies that the agentic provider only
5
+ * fetched documentation from the configured allowed origins.
6
+ *
7
+ * Compiled to dist/assertions/source-isolation.js and referenced via
8
+ * file:// in generated Promptfoo configs.
9
+ *
10
+ * The assertion has weight: 0 — it doesn't affect scores. It surfaces
11
+ * as an advisory pass/fail signal in results.
12
+ *
13
+ * @see docs/exec-plans/completed/source-aware-eval-isolation.md (Phase 3b)
14
+ */
15
+ import { isAllowedOrigin } from "../sources.js";
16
+ /**
17
+ * Analyze doc page visits and compute an isolation score.
18
+ *
19
+ * The isolation score is the fraction of doc-fetching requests that went
20
+ * to an allowed origin. A score of 1.0 means perfect isolation.
21
+ *
22
+ * @param docPageVisits - Classified doc page visit records
23
+ * @param allowedOrigins - Origin patterns (glob-capable)
24
+ * @returns Isolation report with score and breakdown
25
+ */
26
+ export function analyzeSourceIsolation(docPageVisits, allowedOrigins) {
27
+ if (allowedOrigins.length === 0 || docPageVisits.length === 0) {
28
+ return {
29
+ blocked: 0,
30
+ isolationScore: 1.0,
31
+ offOrigin: 0,
32
+ offOriginUrls: [],
33
+ onOrigin: docPageVisits.length,
34
+ originBreakdown: {},
35
+ total: docPageVisits.length,
36
+ };
37
+ }
38
+ let onOrigin = 0;
39
+ let offOrigin = 0;
40
+ const offOriginUrls = [];
41
+ const originBreakdown = {};
42
+ for (const visit of docPageVisits) {
43
+ try {
44
+ const hostname = new URL(visit.url).hostname.replace(/^www\./, "");
45
+ originBreakdown[hostname] = (originBreakdown[hostname] || 0) + 1;
46
+ if (isAllowedOrigin(visit.url, allowedOrigins)) {
47
+ onOrigin++;
48
+ }
49
+ else {
50
+ offOrigin++;
51
+ offOriginUrls.push(visit.url);
52
+ }
53
+ }
54
+ catch {
55
+ offOrigin++;
56
+ offOriginUrls.push(visit.url);
57
+ }
58
+ }
59
+ const total = onOrigin + offOrigin;
60
+ const isolationScore = total > 0 ? onOrigin / total : 1.0;
61
+ return {
62
+ blocked: 0, // Blocked requests aren't in docPageVisits — they're caught earlier
63
+ isolationScore,
64
+ offOrigin,
65
+ offOriginUrls,
66
+ onOrigin,
67
+ originBreakdown,
68
+ total,
69
+ };
70
+ }
71
+ // ---------------------------------------------------------------------------
72
+ // Promptfoo assertion entry point
73
+ // ---------------------------------------------------------------------------
74
+ /**
75
+ * Promptfoo custom assertion function.
76
+ *
77
+ * Called by Promptfoo for each test case when referenced as:
78
+ * type: javascript
79
+ * value: file://dist/assertions/source-isolation.js
80
+ * weight: 0
81
+ *
82
+ * @param output - The model's text output (unused)
83
+ * @param context - Promptfoo assertion context with provider metadata
84
+ */
85
+ export default function (_output, context) {
86
+ const behavior = context.providerResponse?.metadata?.agentBehavior;
87
+ if (!behavior) {
88
+ return { pass: true, reason: "No agent behavior recorded", score: 1 };
89
+ }
90
+ const docVisits = behavior.docPageVisits ?? [];
91
+ if (docVisits.length === 0) {
92
+ return { pass: true, reason: "No doc page visits recorded", score: 1 };
93
+ }
94
+ // Read allowed origins from env (set by pipeline.ts)
95
+ const originsEnv = process.env.DOC_ALLOWED_ORIGINS;
96
+ const allowedOrigins = originsEnv
97
+ ? originsEnv
98
+ .split(",")
99
+ .map((o) => o.trim())
100
+ .filter(Boolean)
101
+ : [];
102
+ if (allowedOrigins.length === 0) {
103
+ return {
104
+ pass: true,
105
+ reason: `No origin sandboxing configured (${docVisits.length} doc visits)`,
106
+ score: 1,
107
+ };
108
+ }
109
+ const report = analyzeSourceIsolation(docVisits, allowedOrigins);
110
+ return {
111
+ pass: report.offOrigin === 0,
112
+ reason: report.offOrigin === 0
113
+ ? `All doc fetches on-origin (${report.onOrigin} visits, origins: ${allowedOrigins.join(", ")})`
114
+ : `${report.offOrigin} off-origin doc fetch(es): ${report.offOriginUrls.join(", ")}`,
115
+ score: report.isolationScore,
116
+ };
117
+ }
package/dist/cli.d.ts ADDED
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * cli.ts — AILF CLI entry point.
4
+ *
5
+ * Unified command-line interface for the AI Literacy Framework.
6
+ * All evaluation commands are exposed as subcommands under `ailf`.
7
+ *
8
+ * Usage:
9
+ * ailf pipeline [flags] # full evaluation pipeline
10
+ * ailf compare [flags] # compare evaluation runs
11
+ * ailf baseline <cmd> [flags] # baseline management
12
+ * ailf validate [flags] # config validation
13
+ * ailf completion bash # generate shell completions
14
+ * ailf --help # list all commands
15
+ *
16
+ * Global options:
17
+ * --verbose / -v # increase log output
18
+ * --quiet / -q # suppress non-error output
19
+ * --dotenv <path> # override default .env path
20
+ *
21
+ * Dev mode (without building):
22
+ * tsx src/cli.ts pipeline --debug
23
+ */
24
+ export {};
package/dist/cli.js ADDED
@@ -0,0 +1,199 @@
1
+ #!/usr/bin/env node
2
+ /* oxlint-disable import/first -- imports are intentionally interleaved with
3
+ command registration for readability and lazy loading */
4
+ /**
5
+ * cli.ts — AILF CLI entry point.
6
+ *
7
+ * Unified command-line interface for the AI Literacy Framework.
8
+ * All evaluation commands are exposed as subcommands under `ailf`.
9
+ *
10
+ * Usage:
11
+ * ailf pipeline [flags] # full evaluation pipeline
12
+ * ailf compare [flags] # compare evaluation runs
13
+ * ailf baseline <cmd> [flags] # baseline management
14
+ * ailf validate [flags] # config validation
15
+ * ailf completion bash # generate shell completions
16
+ * ailf --help # list all commands
17
+ *
18
+ * Global options:
19
+ * --verbose / -v # increase log output
20
+ * --quiet / -q # suppress non-error output
21
+ * --dotenv <path> # override default .env path
22
+ *
23
+ * Dev mode (without building):
24
+ * tsx src/cli.ts pipeline --debug
25
+ */
26
+ import { config as dotenvConfig } from "dotenv";
27
+ import { existsSync, readFileSync } from "fs";
28
+ import { dirname, resolve } from "path";
29
+ import { fileURLToPath } from "url";
30
+ const __dirname = dirname(fileURLToPath(import.meta.url));
31
+ const ROOT = resolve(__dirname, "..");
32
+ /** Path to the eval package root (packages/eval). Used by --explain. */
33
+ const EVAL_ROOT = ROOT;
34
+ // ---------------------------------------------------------------------------
35
+ // Load .env — must happen before Commander parses so that .env()
36
+ // fallbacks resolve correctly.
37
+ //
38
+ // Resolution order:
39
+ // 1. Explicit --dotenv <path> flag
40
+ // 2. Monorepo root .env (../../.env relative to packages/eval/)
41
+ // 3. Caller's working directory .env
42
+ //
43
+ // This allows the CLI to work both in the monorepo (dev) and when
44
+ // installed globally via npm (production).
45
+ // ---------------------------------------------------------------------------
46
+ function resolveEnvPath() {
47
+ const idx = process.argv.indexOf("--dotenv");
48
+ if (idx !== -1 && process.argv[idx + 1]) {
49
+ return resolve(process.argv[idx + 1]);
50
+ }
51
+ // Monorepo root .env (dev mode)
52
+ const monorepoEnv = resolve(ROOT, "..", "..", ".env");
53
+ if (existsSync(monorepoEnv))
54
+ return monorepoEnv;
55
+ // Caller's working directory .env (npm install mode)
56
+ const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
57
+ return resolve(callerCwd, ".env");
58
+ }
59
+ const envPath = resolveEnvPath();
60
+ if (existsSync(envPath)) {
61
+ dotenvConfig({ override: true, path: envPath });
62
+ }
63
+ // ---------------------------------------------------------------------------
64
+ // Pre-scan for --verbose / --quiet to make them available globally before
65
+ // Commander parses. Commands can check process.env.AILF_LOG_LEVEL.
66
+ // ---------------------------------------------------------------------------
67
+ if (process.argv.includes("--verbose") || process.argv.includes("-v")) {
68
+ process.env.AILF_LOG_LEVEL = "verbose";
69
+ }
70
+ else if (process.argv.includes("--quiet") || process.argv.includes("-q")) {
71
+ process.env.AILF_LOG_LEVEL = "quiet";
72
+ }
73
+ // ---------------------------------------------------------------------------
74
+ // Build CLI program
75
+ // ---------------------------------------------------------------------------
76
+ import { Command } from "commander";
77
+ // Read version from package.json
78
+ const pkgPath = resolve(ROOT, "package.json");
79
+ const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
80
+ const program = new Command()
81
+ .name("ailf")
82
+ .description("AI Literacy Framework — evaluate how well docs enable AI coding tools")
83
+ .version(pkg.version)
84
+ .option("-v, --verbose", "Increase log output")
85
+ .option("-q, --quiet", "Suppress non-error output")
86
+ .option("--dotenv <path>", "Override default .env file path")
87
+ .option("--explain", "Show execution plan without running")
88
+ .option("--format <fmt>", "Output format for --explain (console, json)", "console")
89
+ .option("-y, --yes", "With --explain: show plan then prompt to confirm execution");
90
+ // ---------------------------------------------------------------------------
91
+ // Global --explain hook — intercepts any command before execution
92
+ // ---------------------------------------------------------------------------
93
+ program.hook("preAction", async (thisCommand, actionCommand) => {
94
+ const globalOpts = thisCommand.opts();
95
+ if (!globalOpts.explain)
96
+ return;
97
+ const { handleExplain } = await import("./commands/explain-handler.js");
98
+ try {
99
+ await handleExplain(actionCommand, globalOpts.yes ?? false, EVAL_ROOT);
100
+ process.exit(0);
101
+ }
102
+ catch (err) {
103
+ // Sentinel from --yes confirmation: user wants to proceed
104
+ if (err !== null &&
105
+ typeof err === "object" &&
106
+ "__proceedArgv" in err) {
107
+ const filteredArgv = err.__proceedArgv;
108
+ console.log("\n ▸ Proceeding with execution…\n");
109
+ await program.parseAsync(filteredArgv);
110
+ return;
111
+ }
112
+ throw err;
113
+ }
114
+ });
115
+ // ---------------------------------------------------------------------------
116
+ // Register commands
117
+ // ---------------------------------------------------------------------------
118
+ // Pipeline — the main orchestrator
119
+ import { createPipelineCommand } from "./commands/pipeline.js";
120
+ program.addCommand(createPipelineCommand());
121
+ // Compare — structured score comparison
122
+ import { createCompareCommand } from "./commands/compare.js";
123
+ program.addCommand(createCompareCommand());
124
+ // Baseline — save/compare/history
125
+ import { createBaselineCommand } from "./commands/baseline.js";
126
+ program.addCommand(createBaselineCommand());
127
+ // Validate — config validation
128
+ import { createValidateCommand } from "./commands/validate.js";
129
+ program.addCommand(createValidateCommand());
130
+ // Coverage audit — feature coverage analysis
131
+ import { createCoverageAuditCommand } from "./commands/coverage-audit.js";
132
+ program.addCommand(createCoverageAuditCommand());
133
+ // Weekly digest — trend digest delivery
134
+ import { createWeeklyDigestCommand } from "./commands/weekly-digest.js";
135
+ program.addCommand(createWeeklyDigestCommand());
136
+ // Readiness report — launch readiness checklist
137
+ import { createReadinessReportCommand } from "./commands/readiness-report.js";
138
+ program.addCommand(createReadinessReportCommand());
139
+ // Discovery report — agent discoverability analysis
140
+ import { createDiscoveryReportCommand } from "./commands/discovery-report.js";
141
+ program.addCommand(createDiscoveryReportCommand());
142
+ // Grader — reliability tools (consistency, compare, sensitivity, validate)
143
+ import { createGraderCommand } from "./commands/grader/index.js";
144
+ program.addCommand(createGraderCommand());
145
+ // Fetch docs — pull documentation from Sanity CMS
146
+ import { createFetchDocsCommand } from "./commands/fetch-docs.js";
147
+ program.addCommand(createFetchDocsCommand());
148
+ // Generate configs — generate promptfoo config files
149
+ import { createGenerateConfigsCommand } from "./commands/generate-configs.js";
150
+ program.addCommand(createGenerateConfigsCommand());
151
+ // Calculate scores — compute AI Literacy Scores from eval results
152
+ import { createCalculateScoresCommand } from "./commands/calculate-scores.js";
153
+ program.addCommand(createCalculateScoresCommand());
154
+ // Eval — direct promptfoo eval passthrough
155
+ import { createEvalCommand } from "./commands/eval.js";
156
+ program.addCommand(createEvalCommand());
157
+ // PR comment — generate markdown PR comment
158
+ import { createPrCommentCommand } from "./commands/pr-comment.js";
159
+ program.addCommand(createPrCommentCommand());
160
+ // Publish — standalone report publishing to Sanity Content Lake
161
+ import { createPublishCommand } from "./commands/publish.js";
162
+ program.addCommand(createPublishCommand());
163
+ // Agent report — agent behavior observation report
164
+ import { createAgentReportCommand } from "./commands/agent-report.js";
165
+ program.addCommand(createAgentReportCommand());
166
+ // Cache — local pipeline cache management
167
+ import { createCacheCommand } from "./commands/cache.js";
168
+ program.addCommand(createCacheCommand());
169
+ // Webhook server — local development server
170
+ import { createWebhookServerCommand } from "./commands/webhook-server.js";
171
+ program.addCommand(createWebhookServerCommand());
172
+ // Lookup doc — search Sanity for documentation articles
173
+ import { createLookupDocCommand } from "./commands/lookup-doc.js";
174
+ program.addCommand(createLookupDocCommand());
175
+ // Measure retrieval — retrieval quality measurement
176
+ import { createMeasureRetrievalCommand } from "./commands/measure-retrieval.js";
177
+ program.addCommand(createMeasureRetrievalCommand());
178
+ // Init — initialize a directory for AILF
179
+ import { createInitCommand } from "./commands/init.js";
180
+ program.addCommand(createInitCommand());
181
+ // Validate tasks — standalone repo task validation
182
+ import { createValidateTasksCommand } from "./commands/validate-tasks.js";
183
+ program.addCommand(createValidateTasksCommand());
184
+ // Interactive — guided wizard
185
+ import { createInteractiveCommand } from "./commands/interactive.js";
186
+ program.addCommand(createInteractiveCommand());
187
+ // Shell completion — must be registered last (needs full program tree)
188
+ import { createCompletionCommand } from "./commands/completion.js";
189
+ program.addCommand(createCompletionCommand(program));
190
+ // ---------------------------------------------------------------------------
191
+ // Parse and run — default to interactive mode when no arguments given
192
+ // ---------------------------------------------------------------------------
193
+ // If no command is specified (just `ailf`), launch interactive mode
194
+ if (process.argv.length <= 2) {
195
+ await program.parseAsync([...process.argv, "interactive"]);
196
+ }
197
+ else {
198
+ await program.parseAsync();
199
+ }
@@ -0,0 +1,5 @@
1
+ /**
2
+ * agent-report command — generate an agent behavior observation report.
3
+ */
4
+ import { Command } from "commander";
5
+ export declare function createAgentReportCommand(): Command;
@@ -0,0 +1,69 @@
1
+ /**
2
+ * agent-report command — generate an agent behavior observation report.
3
+ */
4
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
5
+ import { dirname, join } from "path";
6
+ import { Command } from "commander";
7
+ import { analyzeResults } from "../pipeline/agent-behavior-report.js";
8
+ export function createAgentReportCommand() {
9
+ return new Command("agent-report")
10
+ .description("Generate an agent behavior observation report from eval results")
11
+ .argument("[results-path]", "Path to eval-results.json (default: results/latest/eval-results.json)")
12
+ .action(async (resultsPath) => {
13
+ try {
14
+ const ROOT = join(dirname(new URL(import.meta.url).pathname), "..", "..");
15
+ const resolvedPath = resultsPath ?? join(ROOT, "results", "latest", "eval-results.json");
16
+ if (!existsSync(resolvedPath)) {
17
+ console.error(`Results file not found: ${resolvedPath}`);
18
+ console.error("Run an evaluation first: pnpm eval:observed");
19
+ process.exitCode = 1;
20
+ return;
21
+ }
22
+ console.log(`Reading results from: ${resolvedPath}`);
23
+ console.log();
24
+ const json = JSON.parse(readFileSync(resolvedPath, "utf-8"));
25
+ const rawResults = Array.isArray(json.results)
26
+ ? json.results
27
+ : json.results.results;
28
+ const analysis = analyzeResults(rawResults);
29
+ if (!analysis.hasData) {
30
+ console.log("No agent behavior data found in the results.");
31
+ console.log("Make sure you ran the evaluation with the observed config:");
32
+ console.log(" pnpm eval:observed");
33
+ return;
34
+ }
35
+ // Write JSON report
36
+ const outDir = join(ROOT, "results", "latest");
37
+ mkdirSync(outDir, { recursive: true });
38
+ const reportData = {
39
+ features: analysis.features.map((f) => ({
40
+ avgDocPages: f.avgDocPages,
41
+ avgNetworkMs: f.avgNetworkMs,
42
+ avgSearches: f.avgSearches,
43
+ canonicalCoverage: f.canonicalCoverage,
44
+ canonicalSlugs: f.canonicalSlugs,
45
+ docSlugsVisited: f.allDocSlugs,
46
+ externalDomains: f.allExternalDomains,
47
+ feature: f.feature,
48
+ searchQueries: f.allSearchQueries,
49
+ taskCount: f.tasks.length,
50
+ })),
51
+ tasks: analysis.tasks.map((t) => ({
52
+ behavior: t.behavior,
53
+ description: t.description,
54
+ feature: t.feature,
55
+ hasDocs: t.hasDocs,
56
+ })),
57
+ timestamp: new Date().toISOString(),
58
+ totalTasks: analysis.tasks.length,
59
+ };
60
+ writeFileSync(join(outDir, "agent-behavior-report.json"), JSON.stringify(reportData, null, 2));
61
+ console.log("Agent behavior report written to results/latest/agent-behavior-report.json");
62
+ }
63
+ catch (err) {
64
+ process.exitCode = 1;
65
+ if (err instanceof Error)
66
+ console.error(err.message);
67
+ }
68
+ });
69
+ }
@@ -0,0 +1,9 @@
1
+ /**
2
+ * baseline command — manage historical baseline snapshots of evaluation scores.
3
+ *
4
+ * Wraps the core baseline functions from pipeline/baseline.ts behind a
5
+ * Commander subcommand interface: `baseline save`, `baseline compare`,
6
+ * `baseline history`.
7
+ */
8
+ import { Command } from "commander";
9
+ export declare function createBaselineCommand(): Command;