@sanity/ailf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (530) hide show
  1. package/README.md +89 -0
  2. package/bin/ailf.js +64 -0
  3. package/canonical/grader-references/README.md +88 -0
  4. package/canonical/grader-references/groq.yaml +234 -0
  5. package/canonical/grader-references/studio-setup.yaml +275 -0
  6. package/canonical/reference-solutions/.gitkeep +1 -0
  7. package/canonical/reference-solutions/frameworks/nuxt.ts +119 -0
  8. package/canonical/reference-solutions/frameworks/remix.tsx +100 -0
  9. package/canonical/reference-solutions/functions/publish-webhook.ts +60 -0
  10. package/canonical/reference-solutions/groq/advanced-filtering.ts +379 -0
  11. package/canonical/reference-solutions/groq/blog-queries.ts +137 -0
  12. package/canonical/reference-solutions/groq/joins-references.ts +300 -0
  13. package/canonical/reference-solutions/nextjs/app-router-integration.tsx +128 -0
  14. package/canonical/reference-solutions/studio-setup/blog-schema.ts +143 -0
  15. package/canonical/reference-solutions/studio-setup/custom-tool.tsx +78 -0
  16. package/canonical/reference-solutions/visual-editing/live-preview.tsx +137 -0
  17. package/canonical/reference-solutions/visual-editing/presentation-nextjs.tsx +130 -0
  18. package/config/airbyte/ai_literacy_framework.connector.yaml +639 -0
  19. package/config/bigquery/README.md +74 -0
  20. package/config/bigquery/views/area_scores.sql +87 -0
  21. package/config/bigquery/views/reports.sql +49 -0
  22. package/config/features.yaml +116 -0
  23. package/config/models.yaml +115 -0
  24. package/config/prompts.yaml +75 -0
  25. package/config/rubrics.yaml +62 -0
  26. package/config/schedules.yaml +43 -0
  27. package/config/sinks.yaml +54 -0
  28. package/config/sources.yaml +51 -0
  29. package/config/thresholds.yaml +49 -0
  30. package/dist/_vendor/ailf-core/examples/index.d.ts +190 -0
  31. package/dist/_vendor/ailf-core/examples/index.js +285 -0
  32. package/dist/_vendor/ailf-core/index.d.ts +17 -0
  33. package/dist/_vendor/ailf-core/index.js +17 -0
  34. package/dist/_vendor/ailf-core/ports/cache-store.d.ts +72 -0
  35. package/dist/_vendor/ailf-core/ports/cache-store.js +17 -0
  36. package/dist/_vendor/ailf-core/ports/config-source.d.ts +33 -0
  37. package/dist/_vendor/ailf-core/ports/config-source.js +15 -0
  38. package/dist/_vendor/ailf-core/ports/context.d.ts +172 -0
  39. package/dist/_vendor/ailf-core/ports/context.js +14 -0
  40. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +131 -0
  41. package/dist/_vendor/ailf-core/ports/doc-fetcher.js +12 -0
  42. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +24 -0
  43. package/dist/_vendor/ailf-core/ports/eval-runner.js +8 -0
  44. package/dist/_vendor/ailf-core/ports/index.d.ts +15 -0
  45. package/dist/_vendor/ailf-core/ports/index.js +7 -0
  46. package/dist/_vendor/ailf-core/ports/logger.d.ts +36 -0
  47. package/dist/_vendor/ailf-core/ports/logger.js +11 -0
  48. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +46 -0
  49. package/dist/_vendor/ailf-core/ports/pipeline-step.js +8 -0
  50. package/dist/_vendor/ailf-core/ports/task-source.d.ts +159 -0
  51. package/dist/_vendor/ailf-core/ports/task-source.js +72 -0
  52. package/dist/_vendor/ailf-core/schemas/callback-payload.d.ts +24 -0
  53. package/dist/_vendor/ailf-core/schemas/callback-payload.js +29 -0
  54. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +55 -0
  55. package/dist/_vendor/ailf-core/schemas/eval-config.js +78 -0
  56. package/dist/_vendor/ailf-core/schemas/index.d.ts +16 -0
  57. package/dist/_vendor/ailf-core/schemas/index.js +16 -0
  58. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +125 -0
  59. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +67 -0
  60. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +531 -0
  61. package/dist/_vendor/ailf-core/schemas/pipeline.js +318 -0
  62. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +68 -0
  63. package/dist/_vendor/ailf-core/schemas/schedules.js +74 -0
  64. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +207 -0
  65. package/dist/_vendor/ailf-core/schemas/sinks.js +108 -0
  66. package/dist/_vendor/ailf-core/services/comparison-formatters.d.ts +18 -0
  67. package/dist/_vendor/ailf-core/services/comparison-formatters.js +189 -0
  68. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +41 -0
  69. package/dist/_vendor/ailf-core/services/config-helpers.js +86 -0
  70. package/dist/_vendor/ailf-core/services/index.d.ts +12 -0
  71. package/dist/_vendor/ailf-core/services/index.js +12 -0
  72. package/dist/_vendor/ailf-core/services/scoring.d.ts +49 -0
  73. package/dist/_vendor/ailf-core/services/scoring.js +222 -0
  74. package/dist/_vendor/ailf-core/types/index.d.ts +1082 -0
  75. package/dist/_vendor/ailf-core/types/index.js +21 -0
  76. package/dist/_vendor/ailf-core/types/scoring-input.d.ts +54 -0
  77. package/dist/_vendor/ailf-core/types/scoring-input.js +9 -0
  78. package/dist/_vendor/ailf-shared/dimension-names.d.ts +21 -0
  79. package/dist/_vendor/ailf-shared/dimension-names.js +27 -0
  80. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -0
  81. package/dist/_vendor/ailf-shared/document-ref.js +1 -0
  82. package/dist/_vendor/ailf-shared/eval-modes.d.ts +12 -0
  83. package/dist/_vendor/ailf-shared/eval-modes.js +8 -0
  84. package/dist/_vendor/ailf-shared/index.d.ts +16 -0
  85. package/dist/_vendor/ailf-shared/index.js +16 -0
  86. package/dist/_vendor/ailf-shared/noise-threshold.d.ts +9 -0
  87. package/dist/_vendor/ailf-shared/noise-threshold.js +9 -0
  88. package/dist/_vendor/ailf-shared/score-grades.d.ts +17 -0
  89. package/dist/_vendor/ailf-shared/score-grades.js +23 -0
  90. package/dist/adapters/cache/content-lake-cache.d.ts +24 -0
  91. package/dist/adapters/cache/content-lake-cache.js +59 -0
  92. package/dist/adapters/cache/filesystem-cache.d.ts +18 -0
  93. package/dist/adapters/cache/filesystem-cache.js +54 -0
  94. package/dist/adapters/cache/index.d.ts +2 -0
  95. package/dist/adapters/cache/index.js +2 -0
  96. package/dist/adapters/config-sources/cli-config-adapter.d.ts +17 -0
  97. package/dist/adapters/config-sources/cli-config-adapter.js +23 -0
  98. package/dist/adapters/config-sources/file-config-adapter.d.ts +26 -0
  99. package/dist/adapters/config-sources/file-config-adapter.js +96 -0
  100. package/dist/adapters/config-sources/index.d.ts +2 -0
  101. package/dist/adapters/config-sources/index.js +2 -0
  102. package/dist/adapters/doc-fetchers/index.d.ts +1 -0
  103. package/dist/adapters/doc-fetchers/index.js +1 -0
  104. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +76 -0
  105. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +620 -0
  106. package/dist/adapters/eval-runners/index.d.ts +1 -0
  107. package/dist/adapters/eval-runners/index.js +1 -0
  108. package/dist/adapters/eval-runners/promptfoo-eval-adapter.d.ts +14 -0
  109. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +63 -0
  110. package/dist/adapters/index.d.ts +12 -0
  111. package/dist/adapters/index.js +12 -0
  112. package/dist/adapters/loggers/console-logger.d.ts +22 -0
  113. package/dist/adapters/loggers/console-logger.js +54 -0
  114. package/dist/adapters/loggers/index.d.ts +9 -0
  115. package/dist/adapters/loggers/index.js +9 -0
  116. package/dist/adapters/loggers/json-logger.d.ts +18 -0
  117. package/dist/adapters/loggers/json-logger.js +33 -0
  118. package/dist/adapters/loggers/quiet-logger.d.ts +16 -0
  119. package/dist/adapters/loggers/quiet-logger.js +30 -0
  120. package/dist/adapters/task-sources/composite-task-source.d.ts +20 -0
  121. package/dist/adapters/task-sources/composite-task-source.js +59 -0
  122. package/dist/adapters/task-sources/content-lake-task-source.d.ts +20 -0
  123. package/dist/adapters/task-sources/content-lake-task-source.js +219 -0
  124. package/dist/adapters/task-sources/index.d.ts +7 -0
  125. package/dist/adapters/task-sources/index.js +7 -0
  126. package/dist/adapters/task-sources/repo-schemas.d.ts +245 -0
  127. package/dist/adapters/task-sources/repo-schemas.js +234 -0
  128. package/dist/adapters/task-sources/repo-task-source.d.ts +22 -0
  129. package/dist/adapters/task-sources/repo-task-source.js +104 -0
  130. package/dist/adapters/task-sources/repo-trigger.d.ts +52 -0
  131. package/dist/adapters/task-sources/repo-trigger.js +153 -0
  132. package/dist/adapters/task-sources/repo-validation.d.ts +49 -0
  133. package/dist/adapters/task-sources/repo-validation.js +164 -0
  134. package/dist/adapters/task-sources/yaml-task-source.d.ts +18 -0
  135. package/dist/adapters/task-sources/yaml-task-source.js +136 -0
  136. package/dist/agent-observer/agentic-provider.d.ts +132 -0
  137. package/dist/agent-observer/agentic-provider.js +983 -0
  138. package/dist/agent-observer/classifier.d.ts +62 -0
  139. package/dist/agent-observer/classifier.js +269 -0
  140. package/dist/agent-observer/index.d.ts +7 -0
  141. package/dist/agent-observer/index.js +4 -0
  142. package/dist/agent-observer/pricing.d.ts +35 -0
  143. package/dist/agent-observer/pricing.js +82 -0
  144. package/dist/agent-observer/provider.d.ts +77 -0
  145. package/dist/agent-observer/provider.js +151 -0
  146. package/dist/agent-observer/proxy.d.ts +91 -0
  147. package/dist/agent-observer/proxy.js +321 -0
  148. package/dist/agent-observer/test-imports.d.ts +7 -0
  149. package/dist/agent-observer/test-imports.js +185 -0
  150. package/dist/agent-observer/types.d.ts +137 -0
  151. package/dist/agent-observer/types.js +16 -0
  152. package/dist/assertions/source-isolation.d.ts +72 -0
  153. package/dist/assertions/source-isolation.js +117 -0
  154. package/dist/cli.d.ts +24 -0
  155. package/dist/cli.js +199 -0
  156. package/dist/commands/agent-report.d.ts +5 -0
  157. package/dist/commands/agent-report.js +69 -0
  158. package/dist/commands/baseline.d.ts +9 -0
  159. package/dist/commands/baseline.js +141 -0
  160. package/dist/commands/cache.d.ts +13 -0
  161. package/dist/commands/cache.js +135 -0
  162. package/dist/commands/calculate-scores.d.ts +8 -0
  163. package/dist/commands/calculate-scores.js +48 -0
  164. package/dist/commands/compare.d.ts +8 -0
  165. package/dist/commands/compare.js +120 -0
  166. package/dist/commands/completion.d.ts +18 -0
  167. package/dist/commands/completion.js +260 -0
  168. package/dist/commands/coverage-audit.d.ts +7 -0
  169. package/dist/commands/coverage-audit.js +40 -0
  170. package/dist/commands/discovery-report.d.ts +10 -0
  171. package/dist/commands/discovery-report.js +44 -0
  172. package/dist/commands/eval.d.ts +9 -0
  173. package/dist/commands/eval.js +35 -0
  174. package/dist/commands/explain-handler.d.ts +34 -0
  175. package/dist/commands/explain-handler.js +719 -0
  176. package/dist/commands/fetch-docs.d.ts +8 -0
  177. package/dist/commands/fetch-docs.js +128 -0
  178. package/dist/commands/generate-configs.d.ts +8 -0
  179. package/dist/commands/generate-configs.js +46 -0
  180. package/dist/commands/grader/index.d.ts +11 -0
  181. package/dist/commands/grader/index.js +118 -0
  182. package/dist/commands/init.d.ts +19 -0
  183. package/dist/commands/init.js +150 -0
  184. package/dist/commands/interactive.d.ts +12 -0
  185. package/dist/commands/interactive.js +238 -0
  186. package/dist/commands/lookup-doc.d.ts +15 -0
  187. package/dist/commands/lookup-doc.js +84 -0
  188. package/dist/commands/measure-retrieval.d.ts +5 -0
  189. package/dist/commands/measure-retrieval.js +65 -0
  190. package/dist/commands/pipeline-action.d.ts +71 -0
  191. package/dist/commands/pipeline-action.js +305 -0
  192. package/dist/commands/pipeline.d.ts +62 -0
  193. package/dist/commands/pipeline.js +53 -0
  194. package/dist/commands/pr-comment.d.ts +8 -0
  195. package/dist/commands/pr-comment.js +47 -0
  196. package/dist/commands/publish.d.ts +26 -0
  197. package/dist/commands/publish.js +253 -0
  198. package/dist/commands/readiness-report.d.ts +10 -0
  199. package/dist/commands/readiness-report.js +104 -0
  200. package/dist/commands/shared/options.d.ts +29 -0
  201. package/dist/commands/shared/options.js +57 -0
  202. package/dist/commands/update-quality-scores.d.ts +5 -0
  203. package/dist/commands/update-quality-scores.js +20 -0
  204. package/dist/commands/validate-tasks.d.ts +16 -0
  205. package/dist/commands/validate-tasks.js +93 -0
  206. package/dist/commands/validate.d.ts +9 -0
  207. package/dist/commands/validate.js +73 -0
  208. package/dist/commands/webhook-server.d.ts +5 -0
  209. package/dist/commands/webhook-server.js +30 -0
  210. package/dist/commands/weekly-digest.d.ts +10 -0
  211. package/dist/commands/weekly-digest.js +104 -0
  212. package/dist/composition-root.d.ts +26 -0
  213. package/dist/composition-root.js +107 -0
  214. package/dist/interpolate.d.ts +26 -0
  215. package/dist/interpolate.js +70 -0
  216. package/dist/job-store.d.ts +104 -0
  217. package/dist/job-store.js +188 -0
  218. package/dist/lib/agent-behavior-report.d.ts +8 -0
  219. package/dist/lib/agent-behavior-report.js +185 -0
  220. package/dist/lib/baseline.d.ts +19 -0
  221. package/dist/lib/baseline.js +153 -0
  222. package/dist/lib/calculate-scores.d.ts +23 -0
  223. package/dist/lib/calculate-scores.js +42 -0
  224. package/dist/lib/compare.d.ts +18 -0
  225. package/dist/lib/compare.js +170 -0
  226. package/dist/lib/coverage-audit.d.ts +4 -0
  227. package/dist/lib/coverage-audit.js +42 -0
  228. package/dist/lib/discovery-report.d.ts +13 -0
  229. package/dist/lib/discovery-report.js +57 -0
  230. package/dist/lib/fetch-docs.d.ts +30 -0
  231. package/dist/lib/fetch-docs.js +171 -0
  232. package/dist/lib/generate-configs.d.ts +25 -0
  233. package/dist/lib/generate-configs.js +42 -0
  234. package/dist/lib/grader-api.d.ts +21 -0
  235. package/dist/lib/grader-api.js +34 -0
  236. package/dist/lib/grader-compare.d.ts +19 -0
  237. package/dist/lib/grader-compare.js +91 -0
  238. package/dist/lib/grader-consistency.d.ts +27 -0
  239. package/dist/lib/grader-consistency.js +79 -0
  240. package/dist/lib/grader-sensitivity.d.ts +19 -0
  241. package/dist/lib/grader-sensitivity.js +75 -0
  242. package/dist/lib/grader-validate.d.ts +19 -0
  243. package/dist/lib/grader-validate.js +78 -0
  244. package/dist/lib/measure-retrieval.d.ts +14 -0
  245. package/dist/lib/measure-retrieval.js +71 -0
  246. package/dist/lib/pr-comment.d.ts +16 -0
  247. package/dist/lib/pr-comment.js +28 -0
  248. package/dist/lib/readiness-report.d.ts +13 -0
  249. package/dist/lib/readiness-report.js +108 -0
  250. package/dist/lib/webhook-server.d.ts +11 -0
  251. package/dist/lib/webhook-server.js +24 -0
  252. package/dist/lib/weekly-digest.d.ts +24 -0
  253. package/dist/lib/weekly-digest.js +148 -0
  254. package/dist/orchestration/build-app-context.d.ts +27 -0
  255. package/dist/orchestration/build-app-context.js +81 -0
  256. package/dist/orchestration/build-step-sequence.d.ts +15 -0
  257. package/dist/orchestration/build-step-sequence.js +84 -0
  258. package/dist/orchestration/config-to-source-overrides.d.ts +9 -0
  259. package/dist/orchestration/config-to-source-overrides.js +28 -0
  260. package/dist/orchestration/env-bridge.d.ts +21 -0
  261. package/dist/orchestration/env-bridge.js +66 -0
  262. package/dist/orchestration/index.d.ts +11 -0
  263. package/dist/orchestration/index.js +11 -0
  264. package/dist/orchestration/pipeline-orchestrator.d.ts +24 -0
  265. package/dist/orchestration/pipeline-orchestrator.js +153 -0
  266. package/dist/orchestration/step-runner.d.ts +20 -0
  267. package/dist/orchestration/step-runner.js +88 -0
  268. package/dist/orchestration/steps/calculate-scores-step.d.ts +13 -0
  269. package/dist/orchestration/steps/calculate-scores-step.js +95 -0
  270. package/dist/orchestration/steps/callback-step.d.ts +24 -0
  271. package/dist/orchestration/steps/callback-step.js +76 -0
  272. package/dist/orchestration/steps/compare-step.d.ts +14 -0
  273. package/dist/orchestration/steps/compare-step.js +92 -0
  274. package/dist/orchestration/steps/discovery-report-step.d.ts +13 -0
  275. package/dist/orchestration/steps/discovery-report-step.js +55 -0
  276. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  277. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  278. package/dist/orchestration/steps/fetch-docs-step.d.ts +14 -0
  279. package/dist/orchestration/steps/fetch-docs-step.js +135 -0
  280. package/dist/orchestration/steps/gap-analysis-step.d.ts +16 -0
  281. package/dist/orchestration/steps/gap-analysis-step.js +136 -0
  282. package/dist/orchestration/steps/generate-configs-step.d.ts +14 -0
  283. package/dist/orchestration/steps/generate-configs-step.js +85 -0
  284. package/dist/orchestration/steps/grader-consistency-step.d.ts +13 -0
  285. package/dist/orchestration/steps/grader-consistency-step.js +64 -0
  286. package/dist/orchestration/steps/index.d.ts +19 -0
  287. package/dist/orchestration/steps/index.js +19 -0
  288. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +21 -0
  289. package/dist/orchestration/steps/mirror-repo-tasks-step.js +94 -0
  290. package/dist/orchestration/steps/publish-report-step.d.ts +26 -0
  291. package/dist/orchestration/steps/publish-report-step.js +216 -0
  292. package/dist/orchestration/steps/readiness-step.d.ts +13 -0
  293. package/dist/orchestration/steps/readiness-step.js +91 -0
  294. package/dist/orchestration/steps/report-step.d.ts +12 -0
  295. package/dist/orchestration/steps/report-step.js +49 -0
  296. package/dist/orchestration/steps/run-eval-step.d.ts +17 -0
  297. package/dist/orchestration/steps/run-eval-step.js +195 -0
  298. package/dist/orchestration/steps/validate-step.d.ts +12 -0
  299. package/dist/orchestration/steps/validate-step.js +41 -0
  300. package/dist/pipeline/agent-behavior-report.d.ts +53 -0
  301. package/dist/pipeline/agent-behavior-report.js +132 -0
  302. package/dist/pipeline/attribution.d.ts +47 -0
  303. package/dist/pipeline/attribution.js +226 -0
  304. package/dist/pipeline/baseline.d.ts +37 -0
  305. package/dist/pipeline/baseline.js +141 -0
  306. package/dist/pipeline/cache.d.ts +101 -0
  307. package/dist/pipeline/cache.js +283 -0
  308. package/dist/pipeline/calculate-scores.d.ts +102 -0
  309. package/dist/pipeline/calculate-scores.js +1128 -0
  310. package/dist/pipeline/callback-delivery.d.ts +50 -0
  311. package/dist/pipeline/callback-delivery.js +89 -0
  312. package/dist/pipeline/checks.d.ts +39 -0
  313. package/dist/pipeline/checks.js +280 -0
  314. package/dist/pipeline/classify-url.d.ts +61 -0
  315. package/dist/pipeline/classify-url.js +93 -0
  316. package/dist/pipeline/compare.d.ts +31 -0
  317. package/dist/pipeline/compare.js +208 -0
  318. package/dist/pipeline/coverage-audit.d.ts +39 -0
  319. package/dist/pipeline/coverage-audit.js +165 -0
  320. package/dist/pipeline/degradations.d.ts +85 -0
  321. package/dist/pipeline/degradations.js +242 -0
  322. package/dist/pipeline/discovery-report.d.ts +55 -0
  323. package/dist/pipeline/discovery-report.js +178 -0
  324. package/dist/pipeline/eval-constants.d.ts +68 -0
  325. package/dist/pipeline/eval-constants.js +111 -0
  326. package/dist/pipeline/eval-fingerprint.d.ts +66 -0
  327. package/dist/pipeline/eval-fingerprint.js +175 -0
  328. package/dist/pipeline/expand-tasks.d.ts +220 -0
  329. package/dist/pipeline/expand-tasks.js +421 -0
  330. package/dist/pipeline/failure-modes.d.ts +46 -0
  331. package/dist/pipeline/failure-modes.js +348 -0
  332. package/dist/pipeline/fetch-url-content.d.ts +44 -0
  333. package/dist/pipeline/fetch-url-content.js +93 -0
  334. package/dist/pipeline/gap-analysis.d.ts +48 -0
  335. package/dist/pipeline/gap-analysis.js +231 -0
  336. package/dist/pipeline/generate-configs.d.ts +72 -0
  337. package/dist/pipeline/generate-configs.js +395 -0
  338. package/dist/pipeline/grader-api.d.ts +49 -0
  339. package/dist/pipeline/grader-api.js +200 -0
  340. package/dist/pipeline/grader-compare-runner.d.ts +44 -0
  341. package/dist/pipeline/grader-compare-runner.js +301 -0
  342. package/dist/pipeline/grader-comparison.d.ts +111 -0
  343. package/dist/pipeline/grader-comparison.js +161 -0
  344. package/dist/pipeline/grader-consistency-runner.d.ts +60 -0
  345. package/dist/pipeline/grader-consistency-runner.js +270 -0
  346. package/dist/pipeline/grader-consistency.d.ts +103 -0
  347. package/dist/pipeline/grader-consistency.js +146 -0
  348. package/dist/pipeline/grader-sensitivity-runner.d.ts +40 -0
  349. package/dist/pipeline/grader-sensitivity-runner.js +282 -0
  350. package/dist/pipeline/grader-sensitivity.d.ts +94 -0
  351. package/dist/pipeline/grader-sensitivity.js +144 -0
  352. package/dist/pipeline/grader-validate-runner.d.ts +38 -0
  353. package/dist/pipeline/grader-validate-runner.js +229 -0
  354. package/dist/pipeline/grader-validation.d.ts +107 -0
  355. package/dist/pipeline/grader-validation.js +169 -0
  356. package/dist/pipeline/map-request-to-config.d.ts +19 -0
  357. package/dist/pipeline/map-request-to-config.js +80 -0
  358. package/dist/pipeline/measure-retrieval.d.ts +59 -0
  359. package/dist/pipeline/measure-retrieval.js +111 -0
  360. package/dist/pipeline/mirror-repo-tasks.d.ts +86 -0
  361. package/dist/pipeline/mirror-repo-tasks.js +350 -0
  362. package/dist/pipeline/plan-format.d.ts +33 -0
  363. package/dist/pipeline/plan-format.js +202 -0
  364. package/dist/pipeline/plan.d.ts +169 -0
  365. package/dist/pipeline/plan.js +708 -0
  366. package/dist/pipeline/pr-comment.d.ts +19 -0
  367. package/dist/pipeline/pr-comment.js +502 -0
  368. package/dist/pipeline/probe.d.ts +52 -0
  369. package/dist/pipeline/probe.js +390 -0
  370. package/dist/pipeline/provenance.d.ts +47 -0
  371. package/dist/pipeline/provenance.js +146 -0
  372. package/dist/pipeline/readiness-report.d.ts +87 -0
  373. package/dist/pipeline/readiness-report.js +205 -0
  374. package/dist/pipeline/release-classification.d.ts +54 -0
  375. package/dist/pipeline/release-classification.js +238 -0
  376. package/dist/pipeline/release-report.d.ts +37 -0
  377. package/dist/pipeline/release-report.js +222 -0
  378. package/dist/pipeline/repo-eval-comment.d.ts +37 -0
  379. package/dist/pipeline/repo-eval-comment.js +165 -0
  380. package/dist/pipeline/repo-threshold-evaluator.d.ts +89 -0
  381. package/dist/pipeline/repo-threshold-evaluator.js +162 -0
  382. package/dist/pipeline/resolve-mappings.d.ts +35 -0
  383. package/dist/pipeline/resolve-mappings.js +72 -0
  384. package/dist/pipeline/retrieval-metrics.d.ts +39 -0
  385. package/dist/pipeline/retrieval-metrics.js +136 -0
  386. package/dist/pipeline/reverse-mapping.d.ts +67 -0
  387. package/dist/pipeline/reverse-mapping.js +88 -0
  388. package/dist/pipeline/schemas.d.ts +9 -0
  389. package/dist/pipeline/schemas.js +9 -0
  390. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  391. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  392. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  393. package/dist/pipeline/steps/compare-step.js +90 -0
  394. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  395. package/dist/pipeline/steps/eval-step.js +347 -0
  396. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  397. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  398. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  399. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  400. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  401. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  402. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  403. package/dist/pipeline/steps/publish-report-step.js +243 -0
  404. package/dist/pipeline/steps/report-step.d.ts +13 -0
  405. package/dist/pipeline/steps/report-step.js +56 -0
  406. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  407. package/dist/pipeline/steps/update-scores-step.js +42 -0
  408. package/dist/pipeline/targeted-loo.d.ts +88 -0
  409. package/dist/pipeline/targeted-loo.js +203 -0
  410. package/dist/pipeline/thresholds.d.ts +27 -0
  411. package/dist/pipeline/thresholds.js +245 -0
  412. package/dist/pipeline/types.d.ts +10 -0
  413. package/dist/pipeline/types.js +10 -0
  414. package/dist/pipeline/validate.d.ts +67 -0
  415. package/dist/pipeline/validate.js +406 -0
  416. package/dist/pipeline/webhook-server.d.ts +37 -0
  417. package/dist/pipeline/webhook-server.js +133 -0
  418. package/dist/report-store.d.ts +84 -0
  419. package/dist/report-store.js +208 -0
  420. package/dist/sanity/client.d.ts +38 -0
  421. package/dist/sanity/client.js +86 -0
  422. package/dist/sanity/portable-text.d.ts +11 -0
  423. package/dist/sanity/portable-text.js +211 -0
  424. package/dist/sanity/queries.d.ts +133 -0
  425. package/dist/sanity/queries.js +300 -0
  426. package/dist/schedules/digest.d.ts +116 -0
  427. package/dist/schedules/digest.js +156 -0
  428. package/dist/schedules/index.d.ts +12 -0
  429. package/dist/schedules/index.js +10 -0
  430. package/dist/schedules/loader.d.ts +31 -0
  431. package/dist/schedules/loader.js +73 -0
  432. package/dist/schedules/schema.d.ts +9 -0
  433. package/dist/schedules/schema.js +9 -0
  434. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  435. package/dist/scripts/agent-behavior-report.js +315 -0
  436. package/dist/scripts/baseline.d.ts +43 -0
  437. package/dist/scripts/baseline.js +267 -0
  438. package/dist/scripts/calculate-scores.d.ts +166 -0
  439. package/dist/scripts/calculate-scores.js +1296 -0
  440. package/dist/scripts/compare.d.ts +22 -0
  441. package/dist/scripts/compare.js +334 -0
  442. package/dist/scripts/coverage-audit.d.ts +44 -0
  443. package/dist/scripts/coverage-audit.js +209 -0
  444. package/dist/scripts/debug-eval.d.ts +19 -0
  445. package/dist/scripts/debug-eval.js +73 -0
  446. package/dist/scripts/discovery-report.d.ts +58 -0
  447. package/dist/scripts/discovery-report.js +250 -0
  448. package/dist/scripts/fetch-docs.d.ts +35 -0
  449. package/dist/scripts/fetch-docs.js +472 -0
  450. package/dist/scripts/generate-configs.d.ts +66 -0
  451. package/dist/scripts/generate-configs.js +459 -0
  452. package/dist/scripts/grader-api.d.ts +27 -0
  453. package/dist/scripts/grader-api.js +206 -0
  454. package/dist/scripts/grader-compare.d.ts +22 -0
  455. package/dist/scripts/grader-compare.js +368 -0
  456. package/dist/scripts/grader-consistency.d.ts +20 -0
  457. package/dist/scripts/grader-consistency.js +313 -0
  458. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  459. package/dist/scripts/grader-sensitivity.js +354 -0
  460. package/dist/scripts/grader-validate.d.ts +19 -0
  461. package/dist/scripts/grader-validate.js +267 -0
  462. package/dist/scripts/measure-retrieval.d.ts +10 -0
  463. package/dist/scripts/measure-retrieval.js +145 -0
  464. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +24 -0
  465. package/dist/scripts/migrate-tasks-to-content-lake.js +327 -0
  466. package/dist/scripts/pipeline.d.ts +76 -0
  467. package/dist/scripts/pipeline.js +1031 -0
  468. package/dist/scripts/pr-comment.d.ts +10 -0
  469. package/dist/scripts/pr-comment.js +510 -0
  470. package/dist/scripts/readiness-report.d.ts +88 -0
  471. package/dist/scripts/readiness-report.js +342 -0
  472. package/dist/scripts/update-quality-scores.d.ts +15 -0
  473. package/dist/scripts/update-quality-scores.js +184 -0
  474. package/dist/scripts/validate-task-sources.d.ts +21 -0
  475. package/dist/scripts/validate-task-sources.js +210 -0
  476. package/dist/scripts/validate.d.ts +13 -0
  477. package/dist/scripts/validate.js +79 -0
  478. package/dist/scripts/webhook-server.d.ts +26 -0
  479. package/dist/scripts/webhook-server.js +147 -0
  480. package/dist/scripts/weekly-digest.d.ts +24 -0
  481. package/dist/scripts/weekly-digest.js +144 -0
  482. package/dist/sinks/bigquery/index.d.ts +131 -0
  483. package/dist/sinks/bigquery/index.js +222 -0
  484. package/dist/sinks/format-slack.d.ts +64 -0
  485. package/dist/sinks/format-slack.js +306 -0
  486. package/dist/sinks/index.d.ts +23 -0
  487. package/dist/sinks/index.js +18 -0
  488. package/dist/sinks/loader.d.ts +18 -0
  489. package/dist/sinks/loader.js +82 -0
  490. package/dist/sinks/retry.d.ts +24 -0
  491. package/dist/sinks/retry.js +52 -0
  492. package/dist/sinks/schema.d.ts +9 -0
  493. package/dist/sinks/schema.js +9 -0
  494. package/dist/sinks/slack/format.d.ts +65 -0
  495. package/dist/sinks/slack/format.js +327 -0
  496. package/dist/sinks/slack/index.d.ts +27 -0
  497. package/dist/sinks/slack/index.js +78 -0
  498. package/dist/sinks/slack-sink.d.ts +27 -0
  499. package/dist/sinks/slack-sink.js +78 -0
  500. package/dist/sinks/types.d.ts +59 -0
  501. package/dist/sinks/types.js +44 -0
  502. package/dist/sinks/webhook/index.d.ts +19 -0
  503. package/dist/sinks/webhook/index.js +50 -0
  504. package/dist/sinks/webhook-sink.d.ts +19 -0
  505. package/dist/sinks/webhook-sink.js +50 -0
  506. package/dist/sources.d.ts +104 -0
  507. package/dist/sources.js +292 -0
  508. package/dist/webhook/budget.d.ts +42 -0
  509. package/dist/webhook/budget.js +60 -0
  510. package/dist/webhook/debounce.d.ts +67 -0
  511. package/dist/webhook/debounce.js +76 -0
  512. package/dist/webhook/dispatch.d.ts +45 -0
  513. package/dist/webhook/dispatch.js +84 -0
  514. package/dist/webhook/eval-request-handler.d.ts +87 -0
  515. package/dist/webhook/eval-request-handler.js +181 -0
  516. package/dist/webhook/handler.d.ts +88 -0
  517. package/dist/webhook/handler.js +203 -0
  518. package/dist/webhook/index.d.ts +17 -0
  519. package/dist/webhook/index.js +12 -0
  520. package/dist/webhook/types.d.ts +109 -0
  521. package/dist/webhook/types.js +10 -0
  522. package/package.json +72 -0
  523. package/tasks/.expanded.agentic.yaml +51 -0
  524. package/tasks/.expanded.yaml +66 -0
  525. package/tasks/frameworks.yaml +98 -0
  526. package/tasks/functions.yaml +51 -0
  527. package/tasks/groq.yaml +216 -0
  528. package/tasks/nextjs-live.yaml +62 -0
  529. package/tasks/studio-setup.yaml +111 -0
  530. package/tasks/visual-editing.yaml +120 -0
@@ -0,0 +1,8 @@
1
+ /**
2
+ * fetch-docs command — pull documentation from Sanity CMS.
3
+ *
4
+ * Uses the composition root to wire adapters, then delegates to
5
+ * ctx.docFetcher (SanityDocFetcher) — the same code path as the pipeline.
6
+ */
7
+ import { Command } from "commander";
8
+ export declare function createFetchDocsCommand(): Command;
@@ -0,0 +1,128 @@
1
+ /**
2
+ * fetch-docs command — pull documentation from Sanity CMS.
3
+ *
4
+ * Uses the composition root to wire adapters, then delegates to
5
+ * ctx.docFetcher (SanityDocFetcher) — the same code path as the pipeline.
6
+ */
7
+ import { mkdirSync, writeFileSync } from "fs";
8
+ import { dirname, join, resolve } from "path";
9
+ import { fileURLToPath } from "url";
10
+ import { Command } from "commander";
11
+ import { createAppContext } from "../composition-root.js";
12
+ import { loadSource } from "../sources.js";
13
+ import { configToSourceOverrides } from "../orchestration/config-to-source-overrides.js";
14
+ import { addSanitySourceOptions } from "./shared/options.js";
15
+ const __dirname = dirname(fileURLToPath(import.meta.url));
16
+ const ROOT = resolve(__dirname, "..", "..");
17
+ export function createFetchDocsCommand() {
18
+ const cmd = new Command("fetch-docs")
19
+ .description("Fetch documentation contexts from Sanity CMS")
20
+ .option("-s, --source <name>", "Documentation source name (from sources.yaml)")
21
+ .option("--include-feature-areas", "Generate feature-area context files", false)
22
+ .option("--include-corpus", "Generate full corpus context file", false)
23
+ .action(async (opts) => {
24
+ try {
25
+ await executeFetchDocs(opts);
26
+ }
27
+ catch (err) {
28
+ process.exitCode = 1;
29
+ if (err instanceof Error)
30
+ console.error(err.message);
31
+ }
32
+ });
33
+ addSanitySourceOptions(cmd);
34
+ return cmd;
35
+ }
36
+ // ---------------------------------------------------------------------------
37
+ // Implementation
38
+ // ---------------------------------------------------------------------------
39
+ async function executeFetchDocs(opts) {
40
+ console.log("=== ai-literacy-framework — Documentation Fetcher ===\n");
41
+ // Build a minimal ResolvedConfig for the composition root
42
+ const ctx = createAppContext({
43
+ rootDir: ROOT,
44
+ mode: "baseline",
45
+ skipFetch: false,
46
+ skipEval: true,
47
+ compareEnabled: false,
48
+ gapAnalysisEnabled: false,
49
+ readinessEnabled: false,
50
+ discoveryReportEnabled: false,
51
+ publishEnabled: false,
52
+ noCache: true,
53
+ noRemoteCache: true,
54
+ searchMode: "open",
55
+ source: opts.source,
56
+ });
57
+ // Resolve source
58
+ const overrides = configToSourceOverrides(ctx.config);
59
+ const resolvedSource = loadSource(ctx.config.source, overrides);
60
+ // Log source info
61
+ console.log(` Source: ${resolvedSource.name}`);
62
+ console.log(` Base URL: ${resolvedSource.baseUrl}`);
63
+ if (resolvedSource.dataset)
64
+ console.log(` Dataset: ${resolvedSource.dataset}`);
65
+ if (resolvedSource.perspective)
66
+ console.log(` Perspective: ${resolvedSource.perspective}`);
67
+ if (resolvedSource.documentIds && resolvedSource.documentIds.length > 0) {
68
+ console.log(` Documents: ${resolvedSource.documentIds.length} document ID(s)`);
69
+ }
70
+ if (resolvedSource.urls.length > 0) {
71
+ console.log(` URLs: ${resolvedSource.urls.length} direct URL(s)`);
72
+ }
73
+ console.log();
74
+ // The composition root wires SanityDocFetcher into ctx.docFetcher.
75
+ // We cast to access the non-port methods (feature areas, corpus).
76
+ const fetcher = ctx.docFetcher;
77
+ // Feature-area contexts (opt-in)
78
+ if (opts.includeFeatureAreas) {
79
+ await fetcher.fetchFeatureAreaContexts(resolvedSource);
80
+ }
81
+ // Canonical contexts — same code path as the pipeline
82
+ const tasks = await ctx.taskSource.loadTasks();
83
+ const tasksWithDocs = tasks.filter((t) => t.canonicalDocs.length > 0);
84
+ if (tasksWithDocs.length > 0) {
85
+ console.log("\nGenerating canonical (gold-retrieval) contexts...\n");
86
+ const result = await fetcher.fetch(tasksWithDocs, resolvedSource);
87
+ // Write metadata files
88
+ if (result.metadata) {
89
+ writeMetadataFiles(ROOT, result.metadata);
90
+ }
91
+ console.log(`\n Canonical contexts: ${result.contexts.length} tasks`);
92
+ for (const docCtx of result.contexts) {
93
+ console.log(` ${docCtx.taskId}: ${docCtx.slugs.length} doc(s), ~${docCtx.tokenCount ?? 0} tokens`);
94
+ }
95
+ }
96
+ // Full corpus (opt-in)
97
+ if (opts.includeCorpus) {
98
+ await fetcher.fetchFullCorpus(resolvedSource);
99
+ }
100
+ console.log("\nDone!");
101
+ }
102
+ // ---------------------------------------------------------------------------
103
+ // Helpers
104
+ // ---------------------------------------------------------------------------
105
+ function writeMetadataFiles(rootDir, metadata) {
106
+ const contextsDir = join(rootDir, "contexts");
107
+ mkdirSync(contextsDir, { recursive: true });
108
+ if (metadata.manifest) {
109
+ const path = join(contextsDir, "document-manifest.json");
110
+ writeFileSync(path, JSON.stringify(metadata.manifest, null, 2));
111
+ console.log(` 📋 Document manifest: ${metadata.manifest.length} docs → contexts/document-manifest.json`);
112
+ }
113
+ if (metadata.releaseImpact) {
114
+ const path = join(contextsDir, "release-impact.json");
115
+ writeFileSync(path, JSON.stringify(metadata.releaseImpact, null, 2));
116
+ console.log(" 📄 Release impact written to contexts/release-impact.json");
117
+ }
118
+ if (metadata.documentOverlay) {
119
+ const path = join(contextsDir, "document-overlay.json");
120
+ writeFileSync(path, JSON.stringify(metadata.documentOverlay, null, 2));
121
+ console.log(" 📄 Document overlay written to contexts/document-overlay.json");
122
+ }
123
+ if (metadata.urlFetch) {
124
+ const path = join(contextsDir, "url-fetch.json");
125
+ writeFileSync(path, JSON.stringify(metadata.urlFetch, null, 2));
126
+ console.log(" 📄 URL fetch metadata written to contexts/url-fetch.json");
127
+ }
128
+ }
@@ -0,0 +1,8 @@
1
+ /**
2
+ * generate-configs command — generate promptfoo config files from models.yaml.
3
+ *
4
+ * Uses the composition root to wire adapters, then calls generateConfigs()
5
+ * directly — the same code path as the pipeline.
6
+ */
7
+ import { Command } from "commander";
8
+ export declare function createGenerateConfigsCommand(): Command;
@@ -0,0 +1,46 @@
1
+ /**
2
+ * generate-configs command — generate promptfoo config files from models.yaml.
3
+ *
4
+ * Uses the composition root to wire adapters, then calls generateConfigs()
5
+ * directly — the same code path as the pipeline.
6
+ */
7
+ import { dirname, resolve } from "path";
8
+ import { fileURLToPath } from "url";
9
+ import { Command } from "commander";
10
+ import { createAppContext } from "../composition-root.js";
11
+ import { generateConfigs } from "../pipeline/generate-configs.js";
12
+ const __dirname = dirname(fileURLToPath(import.meta.url));
13
+ const ROOT = resolve(__dirname, "..", "..");
14
+ export function createGenerateConfigsCommand() {
15
+ return new Command("generate-configs")
16
+ .description("Generate promptfoo config files from config/models.yaml")
17
+ .option("-s, --source <name>", "Documentation source name")
18
+ .action(async (opts) => {
19
+ try {
20
+ const ctx = createAppContext({
21
+ rootDir: ROOT,
22
+ mode: "baseline",
23
+ skipFetch: true,
24
+ skipEval: true,
25
+ compareEnabled: false,
26
+ gapAnalysisEnabled: false,
27
+ readinessEnabled: false,
28
+ discoveryReportEnabled: false,
29
+ publishEnabled: false,
30
+ noCache: true,
31
+ noRemoteCache: true,
32
+ searchMode: "open",
33
+ source: opts.source,
34
+ });
35
+ generateConfigs({
36
+ rootDir: ctx.config.rootDir,
37
+ source: opts.source,
38
+ });
39
+ }
40
+ catch (err) {
41
+ process.exitCode = 1;
42
+ if (err instanceof Error)
43
+ console.error(err.message);
44
+ }
45
+ });
46
+ }
@@ -0,0 +1,11 @@
1
+ /**
2
+ * grader subcommand group — tools for measuring grader reliability.
3
+ *
4
+ * Exposes four subcommands:
5
+ * ailf grader consistency — measure grading variance (Phase 1)
6
+ * ailf grader compare — inter-grader comparison (Phase 3)
7
+ * ailf grader sensitivity — discrimination power testing (Phase 4)
8
+ * ailf grader validate — accuracy against human references (Phase 2)
9
+ */
10
+ import { Command } from "commander";
11
+ export declare function createGraderCommand(): Command;
@@ -0,0 +1,118 @@
1
+ /**
2
+ * grader subcommand group — tools for measuring grader reliability.
3
+ *
4
+ * Exposes four subcommands:
5
+ * ailf grader consistency — measure grading variance (Phase 1)
6
+ * ailf grader compare — inter-grader comparison (Phase 3)
7
+ * ailf grader sensitivity — discrimination power testing (Phase 4)
8
+ * ailf grader validate — accuracy against human references (Phase 2)
9
+ */
10
+ import { dirname, join, resolve } from "path";
11
+ import { fileURLToPath } from "url";
12
+ import { Command } from "commander";
13
+ import { runGraderCompare } from "../../pipeline/grader-compare-runner.js";
14
+ import { runGraderConsistency } from "../../pipeline/grader-consistency-runner.js";
15
+ import { runGraderSensitivity } from "../../pipeline/grader-sensitivity-runner.js";
16
+ import { runGraderValidate } from "../../pipeline/grader-validate-runner.js";
17
+ import { collect } from "../shared/options.js";
18
+ const __dirname = dirname(fileURLToPath(import.meta.url));
19
+ const ROOT = resolve(__dirname, "..", "..", "..");
20
+ export function createGraderCommand() {
21
+ const cmd = new Command("grader").description("Grader reliability tools");
22
+ // ── consistency ──────────────────────────────────────────────────────
23
+ cmd
24
+ .command("consistency")
25
+ .description("Measure grader consistency by re-grading existing responses N times")
26
+ .option("-r, --replications <n>", "Number of additional grading replications", parseInt, 5)
27
+ .option("--results <path>", "Path to eval-results.json")
28
+ .action(async (opts) => {
29
+ try {
30
+ await runGraderConsistency({
31
+ replications: opts.replications,
32
+ resultsPath: opts.results ??
33
+ join(ROOT, "results", "latest", "eval-results.json"),
34
+ rootDir: ROOT,
35
+ });
36
+ }
37
+ catch (err) {
38
+ process.exitCode = 1;
39
+ if (err instanceof Error)
40
+ console.error(err.message);
41
+ }
42
+ });
43
+ // ── compare ──────────────────────────────────────────────────────────
44
+ cmd
45
+ .command("compare")
46
+ .description("Compare multiple grader models on the same responses")
47
+ .option("-c, --candidate <model>", "Candidate grader model ID (repeatable)", collect, [])
48
+ .option("--results <path>", "Path to eval results")
49
+ .option("-f, --format <fmt>", "Output format: table or json", "table")
50
+ .option("-o, --output <path>", "Write JSON report to file")
51
+ .action(async (opts) => {
52
+ try {
53
+ const candidates = opts.candidate.map((id) => ({
54
+ id,
55
+ label: id.split(":").pop() ?? id,
56
+ }));
57
+ await runGraderCompare({
58
+ candidates,
59
+ format: opts.format,
60
+ outputPath: opts.output,
61
+ resultsPath: opts.results,
62
+ rootDir: ROOT,
63
+ });
64
+ }
65
+ catch (err) {
66
+ process.exitCode = 1;
67
+ if (err instanceof Error)
68
+ console.error(err.message);
69
+ }
70
+ });
71
+ // ── sensitivity ──────────────────────────────────────────────────────
72
+ cmd
73
+ .command("sensitivity")
74
+ .description("Test grader discrimination power using programmatic code degradation")
75
+ .option("-a, --area <name>", "Test only reference solutions in this area")
76
+ .option("-f, --format <fmt>", "Output format: table or json", "table")
77
+ .option("-o, --output <path>", "Write JSON report to file")
78
+ .action(async (opts) => {
79
+ try {
80
+ await runGraderSensitivity({
81
+ areaFilter: opts.area,
82
+ format: opts.format,
83
+ outputPath: opts.output,
84
+ rootDir: ROOT,
85
+ });
86
+ }
87
+ catch (err) {
88
+ process.exitCode = 1;
89
+ if (err instanceof Error)
90
+ console.error(err.message);
91
+ }
92
+ });
93
+ // ── validate ─────────────────────────────────────────────────────────
94
+ cmd
95
+ .command("validate")
96
+ .description("Validate grader accuracy against human reference grades")
97
+ .option("-g, --grader <model>", "Grader model to validate")
98
+ .option("-t, --threshold <n>", "MAE threshold for pass/fail", parseFloat, 10)
99
+ .action(async (opts) => {
100
+ try {
101
+ const result = await runGraderValidate({
102
+ graderModel: opts.grader,
103
+ maeThreshold: opts.threshold,
104
+ rootDir: ROOT,
105
+ });
106
+ if (!result.passesThreshold) {
107
+ console.error(`\n ❌ VALIDATION FAILED: MAE ${result.overallMae} exceeds threshold ${opts.threshold}`);
108
+ process.exit(1);
109
+ }
110
+ }
111
+ catch (err) {
112
+ process.exitCode = 1;
113
+ if (err instanceof Error)
114
+ console.error(err.message);
115
+ }
116
+ });
117
+ return cmd;
118
+ }
@@ -0,0 +1,19 @@
1
+ /**
2
+ * commands/init.ts — Initialize a directory for AI Literacy Framework.
3
+ *
4
+ * Creates the .ailf/ directory structure with example configuration and
5
+ * task files. The generated files are ready-to-edit starting points —
6
+ * not live evaluation tasks.
7
+ *
8
+ * YAML output (default) preserves the inline comments from the source
9
+ * YAML files in packages/core/examples/. JSON output is a plain
10
+ * serialization of the parsed data — no comments.
11
+ *
12
+ * Usage:
13
+ * ailf init # YAML output (default)
14
+ * ailf init --output-format json # JSON output
15
+ * ailf init --force # overwrite existing files
16
+ * ailf init --path ./my-dir # target a specific directory
17
+ */
18
+ import { Command } from "commander";
19
+ export declare function createInitCommand(): Command;
@@ -0,0 +1,150 @@
1
+ /**
2
+ * commands/init.ts — Initialize a directory for AI Literacy Framework.
3
+ *
4
+ * Creates the .ailf/ directory structure with example configuration and
5
+ * task files. The generated files are ready-to-edit starting points —
6
+ * not live evaluation tasks.
7
+ *
8
+ * YAML output (default) preserves the inline comments from the source
9
+ * YAML files in packages/core/examples/. JSON output is a plain
10
+ * serialization of the parsed data — no comments.
11
+ *
12
+ * Usage:
13
+ * ailf init # YAML output (default)
14
+ * ailf init --output-format json # JSON output
15
+ * ailf init --force # overwrite existing files
16
+ * ailf init --path ./my-dir # target a specific directory
17
+ */
18
+ import { Command } from "commander";
19
+ import { existsSync, mkdirSync, writeFileSync } from "fs";
20
+ import { resolve, relative } from "path";
21
+ import { ailfConfigData, ailfConfigYaml, taskYamlFiles, TASK_FILE_NAMES, allTaskData, } from "../_vendor/ailf-core/index.js";
22
+ // ---------------------------------------------------------------------------
23
+ // Command factory
24
+ // ---------------------------------------------------------------------------
25
+ export function createInitCommand() {
26
+ return new Command("init")
27
+ .description("Initialize a directory for AI Literacy Framework evaluation")
28
+ .option("--output-format <fmt>", 'Output format for generated files: "yaml" (default) or "json"', "yaml")
29
+ .option("--force", "Overwrite existing files", false)
30
+ .option("--path <dir>", "Target directory (default: current directory)", ".")
31
+ .action(async (opts) => {
32
+ await runInit(opts);
33
+ });
34
+ }
35
+ // ---------------------------------------------------------------------------
36
+ // Helpers
37
+ // ---------------------------------------------------------------------------
38
+ /**
39
+ * Write a file if it doesn't exist (or --force is set).
40
+ * Returns true if the file was written, false if skipped.
41
+ */
42
+ function writeIfNew(filePath, content, force) {
43
+ if (existsSync(filePath) && !force) {
44
+ return false;
45
+ }
46
+ writeFileSync(filePath, content, "utf-8");
47
+ return true;
48
+ }
49
+ /** Relative path for display, prefixed with ./ */
50
+ function rel(from, to) {
51
+ const r = relative(from, to);
52
+ return r.startsWith(".") ? r : `./${r}`;
53
+ }
54
+ // ---------------------------------------------------------------------------
55
+ // Init logic
56
+ // ---------------------------------------------------------------------------
57
+ async function runInit(opts) {
58
+ const format = opts.outputFormat === "json" ? "json" : "yaml";
59
+ const ext = format === "json" ? ".json" : ".yaml";
60
+ const force = opts.force;
61
+ // Resolve target from the caller's actual working directory
62
+ const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
63
+ const targetDir = resolve(callerCwd, opts.path);
64
+ const ailfDir = resolve(targetDir, ".ailf");
65
+ const tasksDir = resolve(ailfDir, "tasks");
66
+ console.log();
67
+ console.log(" 🚀 Initializing AI Literacy Framework");
68
+ console.log();
69
+ // 1. Create directories
70
+ mkdirSync(tasksDir, { recursive: true });
71
+ console.log(` ✓ Created ${rel(targetDir, ailfDir)}/`);
72
+ console.log(` ✓ Created ${rel(targetDir, tasksDir)}/`);
73
+ const written = [];
74
+ const skipped = [];
75
+ // 2. Write .ailf/config.yaml (or .json)
76
+ // YAML: raw string passthrough (preserves comments)
77
+ // JSON: serialize the parsed data
78
+ const configPath = resolve(ailfDir, `config${ext}`);
79
+ const configContent = format === "yaml"
80
+ ? ailfConfigYaml
81
+ : JSON.stringify(ailfConfigData, null, 2) + "\n";
82
+ if (writeIfNew(configPath, configContent, force)) {
83
+ written.push(rel(targetDir, configPath));
84
+ }
85
+ else {
86
+ skipped.push(rel(targetDir, configPath));
87
+ }
88
+ // 3. Write example tasks to .ailf/tasks/
89
+ // YAML: raw string passthrough (preserves comments)
90
+ // JSON: serialize individual task data
91
+ if (format === "yaml") {
92
+ // Each task is its own commented YAML file — write as-is
93
+ for (const stem of TASK_FILE_NAMES) {
94
+ const taskPath = resolve(tasksDir, `${stem}.yaml`);
95
+ const content = taskYamlFiles[stem];
96
+ if (writeIfNew(taskPath, content, force)) {
97
+ written.push(rel(targetDir, taskPath));
98
+ }
99
+ else {
100
+ skipped.push(rel(targetDir, taskPath));
101
+ }
102
+ }
103
+ }
104
+ else {
105
+ // JSON: serialize each task's parsed data individually
106
+ const tasks = Array.isArray(allTaskData)
107
+ ? allTaskData
108
+ : [allTaskData];
109
+ for (const task of tasks) {
110
+ const taskId = task.id;
111
+ const taskPath = resolve(tasksDir, `${taskId}.json`);
112
+ const content = JSON.stringify([task], null, 2) + "\n";
113
+ if (writeIfNew(taskPath, content, force)) {
114
+ written.push(rel(targetDir, taskPath));
115
+ }
116
+ else {
117
+ skipped.push(rel(targetDir, taskPath));
118
+ }
119
+ }
120
+ }
121
+ // 4. Write .gitignore in .ailf/ (keep results out of version control)
122
+ const gitignorePath = resolve(ailfDir, ".gitignore");
123
+ const gitignoreContent = `# AILF generated files\nresults/\ncontexts/\n`;
124
+ if (writeIfNew(gitignorePath, gitignoreContent, force)) {
125
+ written.push(rel(targetDir, gitignorePath));
126
+ }
127
+ else {
128
+ skipped.push(rel(targetDir, gitignorePath));
129
+ }
130
+ // 5. Summary
131
+ console.log();
132
+ if (written.length > 0) {
133
+ for (const f of written) {
134
+ console.log(` ✓ Created ${f}`);
135
+ }
136
+ }
137
+ if (skipped.length > 0) {
138
+ console.log();
139
+ for (const f of skipped) {
140
+ console.log(` ⊘ Skipped ${f} (already exists, use --force to overwrite)`);
141
+ }
142
+ }
143
+ console.log();
144
+ console.log(" Next steps:");
145
+ console.log();
146
+ console.log(` 1. Edit ${rel(targetDir, resolve(ailfDir, `config${ext}`))} with your Sanity project settings`);
147
+ console.log(` 2. Customize the example tasks in ${rel(targetDir, tasksDir)}/`);
148
+ console.log(" 3. Run: ailf pipeline --repo-tasks-path .ailf/tasks/");
149
+ console.log();
150
+ }
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Interactive mode — guided wizard for the evaluation pipeline.
3
+ *
4
+ * When `ailf` is run with no arguments (or `ailf interactive`), this module
5
+ * prompts the user through mode selection, area scoping, debug options,
6
+ * and common flags — then builds and executes the equivalent `ailf pipeline`
7
+ * command.
8
+ *
9
+ * Uses @inquirer/prompts for a clean, modern terminal UI.
10
+ */
11
+ import { Command } from "commander";
12
+ export declare function createInteractiveCommand(): Command;