@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,188 @@
1
+ /**
2
+ * job-store.ts
3
+ *
4
+ * Job tracking store backed by the Sanity Content Lake.
5
+ * Provides CRUD operations for `ailf.job` documents that track
6
+ * API-triggered pipeline evaluations.
7
+ *
8
+ * Job documents are created by the API gateway and updated by the
9
+ * CLI as the pipeline progresses through steps.
10
+ *
11
+ * @see docs/design-docs/api-service-gateway.md
12
+ * @see packages/studio/src/schema/job.ts — Sanity document schema
13
+ */
14
+ import { getSanityClient } from "./sanity/client.js";
15
+ // ---------------------------------------------------------------------------
16
+ // Constants
17
+ // ---------------------------------------------------------------------------
18
+ const JOB_TYPE = "ailf.job";
19
+ /** Default job deadline: 45 minutes from creation */
20
+ const DEFAULT_DEADLINE_MS = 45 * 60 * 1000;
21
+ // ---------------------------------------------------------------------------
22
+ // Job Store
23
+ // ---------------------------------------------------------------------------
24
+ export class JobStore {
25
+ client;
26
+ constructor(options = {}) {
27
+ if (options.client) {
28
+ this.client = options.client;
29
+ }
30
+ else {
31
+ this.client = getSanityClient({
32
+ ...(options.dataset ? { dataset: options.dataset } : {}),
33
+ ...(options.projectId ? { projectId: options.projectId } : {}),
34
+ ...(options.token ? { token: options.token } : {}),
35
+ });
36
+ }
37
+ }
38
+ /**
39
+ * Create a new job document in the Content Lake.
40
+ *
41
+ * Generates a job ID (e.g. "job_a1b2c3d4e5f6") and writes the
42
+ * initial document with status "received". The job ID is used
43
+ * directly as the Sanity document `_id`.
44
+ *
45
+ * @returns The generated job ID, or null on failure
46
+ */
47
+ async createJob(options) {
48
+ const jobId = generateJobId();
49
+ const now = new Date().toISOString();
50
+ const deadlineMs = options.deadlineMs ?? DEFAULT_DEADLINE_MS;
51
+ const deadlineAt = new Date(Date.now() + deadlineMs).toISOString();
52
+ try {
53
+ await this.client.create({
54
+ _id: jobId,
55
+ _type: JOB_TYPE,
56
+ apiKeyPrefix: options.apiKeyPrefix,
57
+ callback: options.callback ?? null,
58
+ completedAt: null,
59
+ config: options.config,
60
+ createdAt: now,
61
+ deadlineAt,
62
+ error: null,
63
+ execution: null,
64
+ jobId,
65
+ progress: null,
66
+ reportId: null,
67
+ startedAt: null,
68
+ status: "received",
69
+ });
70
+ return jobId;
71
+ }
72
+ catch (error) {
73
+ console.warn(` ⚠️ Failed to create job: ${error instanceof Error ? error.message : String(error)}`);
74
+ return null;
75
+ }
76
+ }
77
+ /**
78
+ * Count jobs created today for a given API key prefix.
79
+ * Used for daily eval budget enforcement.
80
+ */
81
+ async countTodayJobs(apiKeyPrefix) {
82
+ try {
83
+ const todayStart = new Date();
84
+ todayStart.setUTCHours(0, 0, 0, 0);
85
+ const count = await this.client.fetch(`count(*[_type == $type && apiKeyPrefix == $prefix && createdAt >= $start])`, {
86
+ prefix: apiKeyPrefix,
87
+ start: todayStart.toISOString(),
88
+ type: JOB_TYPE,
89
+ });
90
+ return count;
91
+ }
92
+ catch {
93
+ return 0;
94
+ }
95
+ }
96
+ /**
97
+ * Count active (non-terminal) jobs for a given API key prefix.
98
+ * Used for concurrent job limit enforcement.
99
+ */
100
+ async countActiveJobs(apiKeyPrefix) {
101
+ try {
102
+ const count = await this.client.fetch(`count(*[_type == $type && apiKeyPrefix == $prefix && status in ["received", "queued", "running"]])`, {
103
+ prefix: apiKeyPrefix,
104
+ type: JOB_TYPE,
105
+ });
106
+ return count;
107
+ }
108
+ catch {
109
+ return 0;
110
+ }
111
+ }
112
+ /**
113
+ * Find stale jobs that have exceeded their deadline.
114
+ * Used by the timeout cron job.
115
+ */
116
+ async findStaleJobs() {
117
+ try {
118
+ const now = new Date().toISOString();
119
+ const docs = await this.client.fetch(`*[_type == $type && status in ["received", "queued", "running"] && deadlineAt < $now]`, { now, type: JOB_TYPE });
120
+ return docs.map(toJobDocument);
121
+ }
122
+ catch {
123
+ return [];
124
+ }
125
+ }
126
+ /**
127
+ * Read a job by its job ID.
128
+ */
129
+ async readJob(jobId) {
130
+ try {
131
+ const doc = await this.client.fetch(`*[_type == $type && jobId == $id][0]`, { id: jobId, type: JOB_TYPE });
132
+ return doc ? toJobDocument(doc) : null;
133
+ }
134
+ catch (error) {
135
+ console.warn(` ⚠️ Failed to read job: ${error instanceof Error ? error.message : String(error)}`);
136
+ return null;
137
+ }
138
+ }
139
+ /**
140
+ * Update a job's status and optional associated data.
141
+ */
142
+ async updateJob(jobId, update) {
143
+ try {
144
+ await this.client.patch(jobId).set(update).commit();
145
+ return true;
146
+ }
147
+ catch (error) {
148
+ console.warn(` ⚠️ Failed to update job ${jobId}: ${error instanceof Error ? error.message : String(error)}`);
149
+ return false;
150
+ }
151
+ }
152
+ }
153
+ // ---------------------------------------------------------------------------
154
+ // Helpers
155
+ // ---------------------------------------------------------------------------
156
+ /**
157
+ * Generate a job ID with "job_" prefix and random suffix.
158
+ *
159
+ * Uses crypto.randomUUID() for uniqueness, shortened for readability.
160
+ * The returned value is used directly as the Sanity document `_id`
161
+ * (no additional prefix wrapping).
162
+ *
163
+ * Example: "job_a1b2c3d4e5f6"
164
+ */
165
+ function generateJobId() {
166
+ const uuid = crypto.randomUUID().replace(/-/g, "");
167
+ return `job_${uuid.slice(0, 12)}`;
168
+ }
169
+ /**
170
+ * Convert a raw Sanity document to a typed JobDocument.
171
+ */
172
+ function toJobDocument(doc) {
173
+ return {
174
+ apiKeyPrefix: doc.apiKeyPrefix,
175
+ callback: doc.callback,
176
+ completedAt: doc.completedAt,
177
+ config: doc.config,
178
+ createdAt: doc.createdAt,
179
+ deadlineAt: doc.deadlineAt,
180
+ error: doc.error,
181
+ execution: doc.execution,
182
+ jobId: doc.jobId,
183
+ progress: doc.progress,
184
+ reportId: doc.reportId,
185
+ startedAt: doc.startedAt,
186
+ status: doc.status,
187
+ };
188
+ }
@@ -0,0 +1,8 @@
1
+ /**
2
+ * lib/agent-behavior-report.ts — DEPRECATED re-export shim.
3
+ * @deprecated Import from ../pipeline/agent-behavior-report.js instead.
4
+ */
5
+ import "dotenv/config";
6
+ export { analyzeResults, CANONICAL_DOC_MAP, detectFeatureArea, } from "../pipeline/agent-behavior-report.js";
7
+ export type { AnalysisResult, FeatureAnalysis, TaskBehavior, TestResult, } from "../pipeline/agent-behavior-report.js";
8
+ export declare function main(resultsPathArg?: string): void;
@@ -0,0 +1,185 @@
1
+ /**
2
+ * lib/agent-behavior-report.ts — DEPRECATED re-export shim.
3
+ * @deprecated Import from ../pipeline/agent-behavior-report.js instead.
4
+ */
5
+ // oxlint-disable-next-line import/no-unassigned-import -- side-effect: loads .env into process.env
6
+ import "dotenv/config";
7
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
8
+ import { dirname, join } from "path";
9
+ export { analyzeResults, CANONICAL_DOC_MAP, detectFeatureArea, } from "../pipeline/agent-behavior-report.js";
10
+ import { analyzeResults, } from "../pipeline/agent-behavior-report.js";
11
+ export function main(resultsPathArg) {
12
+ const ROOT = join(dirname(new URL(import.meta.url).pathname), "..", "..");
13
+ const resultsPath = resultsPathArg ??
14
+ process.argv[2] ??
15
+ join(ROOT, "results", "latest", "eval-results.json");
16
+ if (!existsSync(resultsPath)) {
17
+ console.error(`Results file not found: ${resultsPath}`);
18
+ console.error("Run an evaluation first: pnpm eval:observed");
19
+ process.exit(1);
20
+ }
21
+ console.log(`Reading results from: ${resultsPath}`);
22
+ console.log();
23
+ const json = JSON.parse(readFileSync(resultsPath, "utf-8"));
24
+ const rawResults = Array.isArray(json.results)
25
+ ? json.results
26
+ : json.results.results;
27
+ const analysis = analyzeResults(rawResults);
28
+ if (!analysis.hasData) {
29
+ console.log("No agent behavior data found in the results.");
30
+ console.log("Make sure you ran the evaluation with the observed config:");
31
+ console.log(" pnpm eval:observed");
32
+ process.exit(0);
33
+ }
34
+ printReport(analysis);
35
+ // Persist detailed report as JSON
36
+ const outDir = join(ROOT, "results", "latest");
37
+ mkdirSync(outDir, { recursive: true });
38
+ const reportData = {
39
+ features: analysis.features.map((f) => ({
40
+ avgDocPages: f.avgDocPages,
41
+ avgNetworkMs: f.avgNetworkMs,
42
+ avgSearches: f.avgSearches,
43
+ canonicalCoverage: f.canonicalCoverage,
44
+ canonicalSlugs: f.canonicalSlugs,
45
+ docSlugsVisited: f.allDocSlugs,
46
+ externalDomains: f.allExternalDomains,
47
+ feature: f.feature,
48
+ searchQueries: f.allSearchQueries,
49
+ taskCount: f.tasks.length,
50
+ })),
51
+ tasks: analysis.tasks.map((t) => ({
52
+ behavior: t.behavior,
53
+ description: t.description,
54
+ feature: t.feature,
55
+ hasDocs: t.hasDocs,
56
+ })),
57
+ timestamp: new Date().toISOString(),
58
+ totalTasks: analysis.tasks.length,
59
+ };
60
+ writeFileSync(join(outDir, "agent-behavior-report.json"), JSON.stringify(reportData, null, 2));
61
+ console.log("Agent behavior report written to results/latest/agent-behavior-report.json");
62
+ }
63
+ // ---------------------------------------------------------------------------
64
+ // Report output (kept in shim for backward compat)
65
+ // ---------------------------------------------------------------------------
66
+ function printReport(analysis) {
67
+ console.log("=".repeat(80));
68
+ console.log(" AGENT BEHAVIOR OBSERVATION REPORT");
69
+ console.log("=".repeat(80));
70
+ console.log();
71
+ // ---- Overview table ----
72
+ console.log("OVERVIEW BY FEATURE AREA");
73
+ console.log("-".repeat(80));
74
+ const h = "| Feature Area | Tasks | Avg Docs | Avg Search | Avg Net(ms) | Canon% |";
75
+ const sep = "|---------------------|-------|----------|------------|-------------|--------|";
76
+ console.log(h);
77
+ console.log(sep);
78
+ for (const f of analysis.features) {
79
+ console.log(`| ${f.feature.padEnd(19)} | ` +
80
+ `${f.tasks.length.toString().padStart(5)} | ` +
81
+ `${f.avgDocPages.toFixed(1).padStart(8)} | ` +
82
+ `${f.avgSearches.toFixed(1).padStart(10)} | ` +
83
+ `${Math.round(f.avgNetworkMs).toString().padStart(11)} | ` +
84
+ `${(f.canonicalCoverage * 100).toFixed(0).padStart(5)}% |`);
85
+ }
86
+ console.log();
87
+ // ---- Canonical coverage breakdown ----
88
+ console.log("CANONICAL DOCUMENTATION COVERAGE");
89
+ console.log("-".repeat(80));
90
+ console.log();
91
+ for (const f of analysis.features) {
92
+ console.log(` ${f.feature} (${(f.canonicalCoverage * 100).toFixed(0)}% canonical coverage):`);
93
+ if (f.canonicalSlugs.length === 0) {
94
+ console.log(" (no canonical docs defined)");
95
+ }
96
+ else {
97
+ for (const slug of f.canonicalSlugs) {
98
+ const found = f.allDocSlugs.some((visited) => visited.includes(slug));
99
+ const marker = found ? "[x]" : "[ ]";
100
+ console.log(` ${marker} ${slug}`);
101
+ }
102
+ }
103
+ if (f.allDocSlugs.length > 0) {
104
+ const nonCanonical = f.allDocSlugs.filter((slug) => !f.canonicalSlugs.some((c) => slug.includes(c)));
105
+ if (nonCanonical.length > 0) {
106
+ console.log(" Additional docs visited:");
107
+ for (const slug of nonCanonical) {
108
+ console.log(` + ${slug}`);
109
+ }
110
+ }
111
+ }
112
+ console.log();
113
+ }
114
+ // ---- Search strategy ----
115
+ const allSearches = analysis.features.flatMap((f) => f.allSearchQueries);
116
+ if (allSearches.length > 0) {
117
+ console.log("SEARCH STRATEGY");
118
+ console.log("-".repeat(80));
119
+ console.log();
120
+ for (const f of analysis.features) {
121
+ if (f.allSearchQueries.length === 0)
122
+ continue;
123
+ console.log(` ${f.feature}:`);
124
+ for (const q of f.allSearchQueries) {
125
+ console.log(` -> "${q}"`);
126
+ }
127
+ }
128
+ console.log();
129
+ }
130
+ // ---- Per-task detail ----
131
+ console.log("PER-TASK DETAIL");
132
+ console.log("-".repeat(80));
133
+ console.log();
134
+ for (const f of analysis.features) {
135
+ console.log(` ## ${f.feature}`);
136
+ console.log();
137
+ for (const t of f.tasks) {
138
+ const variant = t.hasDocs ? "[gold]" : "[baseline]";
139
+ console.log(` ${variant} ${t.description}`);
140
+ console.log(` Requests: ${t.behavior.totalRequests} | ` +
141
+ `Doc pages: ${t.behavior.docPagesVisited} | ` +
142
+ `Searches: ${t.behavior.searchesPerformed} | ` +
143
+ `External: ${t.behavior.externalRequestCount}`);
144
+ if (t.behavior.docSlugsVisited.length > 0) {
145
+ console.log(` Docs: ${t.behavior.docSlugsVisited.join(", ")}`);
146
+ }
147
+ if (t.behavior.uniqueSearchQueries.length > 0) {
148
+ console.log(` Queries: ${t.behavior.uniqueSearchQueries.map((q) => `"${q}"`).join(", ")}`);
149
+ }
150
+ console.log();
151
+ }
152
+ }
153
+ // ---- External domains ----
154
+ const allDomains = [
155
+ ...new Set(analysis.features.flatMap((f) => f.allExternalDomains)),
156
+ ];
157
+ if (allDomains.length > 0) {
158
+ console.log("EXTERNAL DOMAINS");
159
+ console.log("-".repeat(80));
160
+ console.log();
161
+ for (const d of allDomains) {
162
+ console.log(` - ${d}`);
163
+ }
164
+ console.log();
165
+ }
166
+ // ---- Summary stats ----
167
+ console.log("OVERALL STATISTICS");
168
+ console.log("-".repeat(80));
169
+ console.log();
170
+ const totalTasks = analysis.tasks.length;
171
+ const tasksUsingDocs = analysis.tasks.filter((t) => t.behavior.usedDocs).length;
172
+ const tasksUsingSearch = analysis.tasks.filter((t) => t.behavior.usedSearch).length;
173
+ const avgCanonical = analysis.features.reduce((s, f) => s + f.canonicalCoverage, 0) /
174
+ (analysis.features.length || 1);
175
+ console.log(` Total tasks observed: ${totalTasks}`);
176
+ console.log(` Tasks that used docs: ${tasksUsingDocs}/${totalTasks} (${((tasksUsingDocs / totalTasks) * 100).toFixed(0)}%)`);
177
+ console.log(` Tasks that used search: ${tasksUsingSearch}/${totalTasks} (${((tasksUsingSearch / totalTasks) * 100).toFixed(0)}%)`);
178
+ console.log(` Avg canonical coverage: ${(avgCanonical * 100).toFixed(1)}%`);
179
+ console.log();
180
+ }
181
+ // Only run when invoked directly (not when imported)
182
+ if (process.argv[1]?.endsWith("agent-behavior-report.ts") ||
183
+ process.argv[1]?.endsWith("agent-behavior-report.js")) {
184
+ main();
185
+ }
@@ -0,0 +1,19 @@
1
+ /**
2
+ * lib/baseline.ts — DEPRECATED re-export shim.
3
+ *
4
+ * The real implementation has moved to pipeline/baseline.ts.
5
+ * This shim preserves backward compatibility for:
6
+ * - Direct CLI invocation: `tsx src/lib/baseline.ts`
7
+ * - Test imports that haven't been updated yet
8
+ *
9
+ * TODO: Update all importers to use pipeline/baseline.ts, then delete this file.
10
+ *
11
+ * @deprecated Import from ../pipeline/baseline.js instead.
12
+ */
13
+ export type { BaselineMetadata, CompareResult, ScoreComparison, } from "../pipeline/baseline.js";
14
+ export declare function saveBaseline(tag?: string): {
15
+ success: boolean;
16
+ message: string;
17
+ };
18
+ export declare function compareBaseline(baselineFile?: string): import("./baseline.js").CompareResult;
19
+ export declare function listBaselines(): import("./baseline.js").BaselineMetadata[];
@@ -0,0 +1,153 @@
1
+ /**
2
+ * lib/baseline.ts — DEPRECATED re-export shim.
3
+ *
4
+ * The real implementation has moved to pipeline/baseline.ts.
5
+ * This shim preserves backward compatibility for:
6
+ * - Direct CLI invocation: `tsx src/lib/baseline.ts`
7
+ * - Test imports that haven't been updated yet
8
+ *
9
+ * TODO: Update all importers to use pipeline/baseline.ts, then delete this file.
10
+ *
11
+ * @deprecated Import from ../pipeline/baseline.js instead.
12
+ */
13
+ import { dirname, resolve } from "path";
14
+ import { fileURLToPath } from "url";
15
+ import { saveBaseline as _saveBaseline, compareBaseline as _compareBaseline, listBaselines as _listBaselines, } from "../pipeline/baseline.js";
16
+ const __dirname = dirname(fileURLToPath(import.meta.url));
17
+ const ROOT = resolve(__dirname, "../..");
18
+ export function saveBaseline(tag) {
19
+ return _saveBaseline(ROOT, tag);
20
+ }
21
+ export function compareBaseline(baselineFile) {
22
+ return _compareBaseline(ROOT, baselineFile);
23
+ }
24
+ export function listBaselines() {
25
+ return _listBaselines(ROOT);
26
+ }
27
+ // ---------------------------------------------------------------------------
28
+ // CLI
29
+ // ---------------------------------------------------------------------------
30
+ if (process.argv[1]?.endsWith("baseline.ts") ||
31
+ process.argv[1]?.endsWith("baseline.js")) {
32
+ const args = process.argv.slice(2);
33
+ const command = args[0] || "save";
34
+ function getArg(name) {
35
+ const idx = args.indexOf(`--${name}`);
36
+ return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : undefined;
37
+ }
38
+ switch (command) {
39
+ case "compare": {
40
+ const file = getArg("file");
41
+ console.log("=== Baseline Comparison ===\n");
42
+ const result = compareBaseline(file);
43
+ if (!result.success) {
44
+ console.error(` ❌ ${result.message}`);
45
+ process.exit(1);
46
+ }
47
+ console.log(` ${result.message}\n`);
48
+ console.log(" " +
49
+ "Feature Area".padEnd(18) +
50
+ "Current".padEnd(10) +
51
+ "Baseline".padEnd(10) +
52
+ "Delta");
53
+ console.log(" " + "-".repeat(50));
54
+ for (const c of result.comparisons) {
55
+ const deltaStr = c.delta > 0 ? `+${c.delta}` : c.delta === 0 ? "=" : String(c.delta);
56
+ const icon = c.delta > 0 ? "📈" : c.delta < 0 ? "📉" : "➡️";
57
+ console.log(" " +
58
+ c.feature.padEnd(18) +
59
+ String(c.current).padEnd(10) +
60
+ String(c.baseline).padEnd(10) +
61
+ `${icon} ${deltaStr}`);
62
+ }
63
+ // Cost comparison (only if cost data exists)
64
+ const hasCostData = result.comparisons.some((c) => c.costCurrent !== undefined || c.costBaseline !== undefined);
65
+ if (hasCostData) {
66
+ console.log();
67
+ console.log(" Cost Comparison:");
68
+ console.log(" " +
69
+ "Feature Area".padEnd(18) +
70
+ "Current".padEnd(10) +
71
+ "Baseline".padEnd(10) +
72
+ "Delta");
73
+ console.log(" " + "-".repeat(50));
74
+ for (const c of result.comparisons) {
75
+ if (c.costCurrent === undefined && c.costBaseline === undefined) {
76
+ continue;
77
+ }
78
+ const cur = `$${(c.costCurrent ?? 0).toFixed(4)}`;
79
+ const base = `$${(c.costBaseline ?? 0).toFixed(4)}`;
80
+ const delta = c.costDelta ?? 0;
81
+ const deltaStr = delta > 0
82
+ ? `+$${delta.toFixed(4)}`
83
+ : delta < 0
84
+ ? `-$${Math.abs(delta).toFixed(4)}`
85
+ : "=";
86
+ const icon = delta > 0 ? "📈" : delta < 0 ? "📉" : "➡️";
87
+ console.log(" " +
88
+ c.feature.padEnd(18) +
89
+ cur.padEnd(10) +
90
+ base.padEnd(10) +
91
+ `${icon} ${deltaStr}`);
92
+ }
93
+ }
94
+ console.log();
95
+ const overallIcon = result.overallDelta > 0 ? "📈" : result.overallDelta < 0 ? "📉" : "➡️";
96
+ const overallStr = result.overallDelta > 0
97
+ ? `+${result.overallDelta}`
98
+ : result.overallDelta === 0
99
+ ? "="
100
+ : String(result.overallDelta);
101
+ console.log(` Overall: ${overallIcon} ${overallStr} points`);
102
+ break;
103
+ }
104
+ case "history": {
105
+ console.log("=== Baseline History ===\n");
106
+ const baselines = listBaselines();
107
+ if (baselines.length === 0) {
108
+ console.log(" No baselines saved yet.");
109
+ }
110
+ else {
111
+ const hasCosts = baselines.some((b) => b.totalCost !== undefined || b.graderCost !== undefined);
112
+ const costHeader = hasCosts ? "Cost".padEnd(10) : "";
113
+ console.log(" " +
114
+ "Date".padEnd(22) +
115
+ "Avg".padEnd(6) +
116
+ "Areas".padEnd(7) +
117
+ costHeader +
118
+ "Tag");
119
+ console.log(" " + "-".repeat(hasCosts ? 60 : 50));
120
+ for (const b of baselines) {
121
+ const date = new Date(b.timestamp).toLocaleString();
122
+ const combinedCost = (b.totalCost ?? 0) + (b.graderCost ?? 0);
123
+ const costStr = hasCosts
124
+ ? (combinedCost > 0 ? `$${combinedCost.toFixed(2)}` : "-").padEnd(10)
125
+ : "";
126
+ console.log(" " +
127
+ date.padEnd(22) +
128
+ String(b.avgScore).padEnd(6) +
129
+ String(b.areaCount).padEnd(7) +
130
+ costStr +
131
+ (b.tag ?? ""));
132
+ }
133
+ }
134
+ break;
135
+ }
136
+ case "save": {
137
+ const tag = getArg("tag");
138
+ console.log("=== Saving baseline snapshot ===\n");
139
+ const result = saveBaseline(tag);
140
+ if (result.success) {
141
+ console.log(` ✅ ${result.message}`);
142
+ }
143
+ else {
144
+ console.error(` ❌ ${result.message}`);
145
+ process.exit(1);
146
+ }
147
+ break;
148
+ }
149
+ default:
150
+ console.error(`Unknown command: "${command}". Use: save, history, compare`);
151
+ process.exit(1);
152
+ }
153
+ }
@@ -0,0 +1,23 @@
1
+ /**
2
+ * lib/calculate-scores.ts — DEPRECATED re-export shim.
3
+ *
4
+ * The real implementation has moved to pipeline/calculate-scores.ts.
5
+ * This shim preserves backward compatibility for:
6
+ * - Direct CLI invocation: `tsx src/lib/calculate-scores.ts`
7
+ * - Test imports that haven't been updated yet
8
+ *
9
+ * TODO: Update all importers to use pipeline/calculate-scores.ts, then delete this file.
10
+ *
11
+ * @deprecated Import from ../pipeline/calculate-scores.js instead.
12
+ */
13
+ export { calculateAndWriteScores, calculateScoresPerModel, extractGraderJudgments, scoreAgenticResults, type CalculateScoresOptions, type PromptfooResultsWrapper, type RawPromptfooFile, type RawTestResult, } from "../pipeline/calculate-scores.js";
14
+ export { classifyRubric, detectFeatureArea, extractUrlMetadata, mergeScores, parseRubricScore, type ActualScoreEntry, type ComponentResult, type TestResult, type UrlMetadata, } from "../_vendor/ailf-core/index.d.ts";
15
+ import type { CalculateScoresOptions } from "../pipeline/calculate-scores.js";
16
+ /**
17
+ * Legacy main() entry point — wraps calculateAndWriteScores() with env var fallbacks.
18
+ *
19
+ * @deprecated Use calculateAndWriteScores() from pipeline/calculate-scores.ts instead.
20
+ */
21
+ export declare function main(options?: Omit<CalculateScoresOptions, "rootDir"> & {
22
+ rootDir?: string;
23
+ }): void;
@@ -0,0 +1,42 @@
1
+ /**
2
+ * lib/calculate-scores.ts — DEPRECATED re-export shim.
3
+ *
4
+ * The real implementation has moved to pipeline/calculate-scores.ts.
5
+ * This shim preserves backward compatibility for:
6
+ * - Direct CLI invocation: `tsx src/lib/calculate-scores.ts`
7
+ * - Test imports that haven't been updated yet
8
+ *
9
+ * TODO: Update all importers to use pipeline/calculate-scores.ts, then delete this file.
10
+ *
11
+ * @deprecated Import from ../pipeline/calculate-scores.js instead.
12
+ */
13
+ import { dirname, join } from "path";
14
+ import { fileURLToPath } from "url";
15
+ // Re-export everything from the real implementation
16
+ export { calculateAndWriteScores, calculateScoresPerModel, extractGraderJudgments, scoreAgenticResults, } from "../pipeline/calculate-scores.js";
17
+ // Re-export core types/functions for backward compatibility
18
+ export { classifyRubric, detectFeatureArea, extractUrlMetadata, mergeScores, parseRubricScore, } from "../_vendor/ailf-core/index.js";
19
+ import { calculateAndWriteScores } from "../pipeline/calculate-scores.js";
20
+ const __dirname = dirname(fileURLToPath(import.meta.url));
21
+ const ROOT = join(__dirname, "..", "..");
22
+ /**
23
+ * Legacy main() entry point — wraps calculateAndWriteScores() with env var fallbacks.
24
+ *
25
+ * @deprecated Use calculateAndWriteScores() from pipeline/calculate-scores.ts instead.
26
+ */
27
+ export function main(options) {
28
+ calculateAndWriteScores({
29
+ rootDir: options?.rootDir ?? ROOT,
30
+ allowedOrigins: options?.allowedOrigins,
31
+ mode: options?.mode ?? process.env.EVAL_MODE ?? "baseline",
32
+ resolvedSource: options?.resolvedSource,
33
+ resultsPath: options?.resultsPath,
34
+ searchMode: options?.searchMode,
35
+ source: options?.source,
36
+ });
37
+ }
38
+ // Only run when invoked directly (not when imported for testing)
39
+ if (process.argv[1]?.endsWith("calculate-scores.ts") ||
40
+ process.argv[1]?.endsWith("calculate-scores.js")) {
41
+ main();
42
+ }
@@ -0,0 +1,18 @@
1
+ /**
2
+ * compare.ts
3
+ *
4
+ * CLI for structured comparison between two evaluation runs.
5
+ *
6
+ * Usage:
7
+ * pnpm compare # compare current vs latest baseline
8
+ * pnpm compare --baseline <path> # compare current vs specific file
9
+ * pnpm compare --baseline <path> --experiment <path> # compare two specific files
10
+ * pnpm compare --threshold 5 # custom noise threshold
11
+ * pnpm compare --output /tmp/comparison.json # write JSON report to file
12
+ * pnpm compare --format json # output raw JSON (default: table)
13
+ *
14
+ * Reads: results/latest/score-summary.json (as experiment, unless --experiment)
15
+ * Reads: results/baselines/<latest>.json (as baseline, unless --baseline)
16
+ */
17
+ export { formatComparisonMarkdown, formatComparisonTable, } from "../_vendor/ailf-core/index.d.ts";
18
+ export declare function main(): void;