@sanity/ailf 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (442) hide show
  1. package/canonical/grader-references/README.md +2 -2
  2. package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
  3. package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
  4. package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
  5. package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
  6. package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
  7. package/config/features.ts +1 -1
  8. package/config/models.ts +28 -23
  9. package/config/sources.ts +1 -1
  10. package/config/thresholds.ts +1 -1
  11. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
  12. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
  13. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
  14. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
  15. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
  16. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
  17. package/dist/_vendor/ailf-core/config-helpers.d.ts +6 -0
  18. package/dist/_vendor/ailf-core/config-helpers.js +29 -0
  19. package/dist/_vendor/ailf-core/examples/index.d.ts +164 -94
  20. package/dist/_vendor/ailf-core/examples/index.js +208 -114
  21. package/dist/_vendor/ailf-core/index.d.ts +1 -0
  22. package/dist/_vendor/ailf-core/index.js +1 -0
  23. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
  24. package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
  25. package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
  26. package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
  27. package/dist/_vendor/ailf-core/ports/context.d.ts +20 -1
  28. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
  29. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  30. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
  31. package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
  32. package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
  33. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -1
  34. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +14 -2
  35. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
  36. package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
  37. package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
  38. package/dist/_vendor/ailf-core/services/index.js +1 -1
  39. package/dist/_vendor/ailf-core/services/scoring.js +9 -0
  40. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +12 -1
  41. package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
  42. package/dist/_vendor/ailf-core/types/index.d.ts +47 -4
  43. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +27 -0
  44. package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
  45. package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
  46. package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
  47. package/dist/_vendor/ailf-tasks/cli.js +61 -0
  48. package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
  49. package/dist/_vendor/ailf-tasks/index.js +16 -0
  50. package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
  51. package/dist/_vendor/ailf-tasks/parser.js +73 -0
  52. package/dist/_vendor/ailf-tasks/schemas.d.ts +198 -0
  53. package/dist/_vendor/ailf-tasks/schemas.js +180 -0
  54. package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
  55. package/dist/_vendor/ailf-tasks/validation.js +162 -0
  56. package/dist/adapters/api-client/remediation.js +2 -2
  57. package/dist/adapters/config-sources/file-config-adapter.js +6 -1
  58. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
  59. package/dist/adapters/index.d.ts +0 -1
  60. package/dist/adapters/index.js +0 -1
  61. package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
  62. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  63. package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
  64. package/dist/adapters/task-sources/content-lake-task-source.js +4 -6
  65. package/dist/adapters/task-sources/index.d.ts +1 -2
  66. package/dist/adapters/task-sources/index.js +1 -2
  67. package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
  68. package/dist/adapters/task-sources/repo-schemas.js +2 -2
  69. package/dist/adapters/task-sources/repo-task-source.js +1 -1
  70. package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
  71. package/dist/adapters/task-sources/repo-trigger.js +1 -1
  72. package/dist/adapters/task-sources/task-file-loader.d.ts +9 -6
  73. package/dist/adapters/task-sources/task-file-loader.js +20 -6
  74. package/dist/agent-observer/test-imports.d.ts +7 -0
  75. package/dist/agent-observer/test-imports.js +185 -0
  76. package/dist/artifact-capture/comparator.d.ts +22 -0
  77. package/dist/artifact-capture/comparator.js +493 -0
  78. package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
  79. package/dist/artifact-capture/filesystem-collector.js +237 -0
  80. package/dist/artifact-capture/redact-artifact.d.ts +20 -0
  81. package/dist/artifact-capture/redact-artifact.js +115 -0
  82. package/dist/assertions/source-isolation.d.ts +1 -1
  83. package/dist/assertions/source-isolation.js +1 -1
  84. package/dist/cli.js +4 -0
  85. package/dist/commands/calculate-scores.js +1 -0
  86. package/dist/commands/capture-compare.d.ts +15 -0
  87. package/dist/commands/capture-compare.js +253 -0
  88. package/dist/commands/capture-list.d.ts +12 -0
  89. package/dist/commands/capture-list.js +147 -0
  90. package/dist/commands/capture.d.ts +9 -0
  91. package/dist/commands/capture.js +16 -0
  92. package/dist/commands/chronic-failures.d.ts +8 -0
  93. package/dist/commands/chronic-failures.js +33 -0
  94. package/dist/commands/explain-handler.d.ts +1 -1
  95. package/dist/commands/explain-handler.js +37 -8
  96. package/dist/commands/fetch-docs.js +1 -0
  97. package/dist/commands/generate-configs.d.ts +3 -3
  98. package/dist/commands/generate-configs.js +20 -8
  99. package/dist/commands/init.d.ts +2 -3
  100. package/dist/commands/init.js +56 -170
  101. package/dist/commands/pipeline-action.d.ts +7 -1
  102. package/dist/commands/pipeline-action.js +43 -19
  103. package/dist/commands/pipeline.d.ts +6 -1
  104. package/dist/commands/pipeline.js +7 -2
  105. package/dist/commands/pr-comment.js +1 -0
  106. package/dist/commands/publish.js +1 -0
  107. package/dist/commands/shared/help.js +2 -2
  108. package/dist/commands/update-quality-scores.d.ts +5 -0
  109. package/dist/commands/update-quality-scores.js +20 -0
  110. package/dist/composition-root.d.ts +2 -3
  111. package/dist/composition-root.js +27 -14
  112. package/dist/config/features.ts +23 -0
  113. package/dist/config/models.ts +100 -0
  114. package/dist/config/prompts.ts +16 -0
  115. package/dist/config/rubrics.ts +225 -0
  116. package/dist/config/schedules.ts +47 -0
  117. package/dist/config/sinks.ts +37 -0
  118. package/dist/config/sources.ts +21 -0
  119. package/dist/config/thresholds.ts +61 -0
  120. package/dist/lib/agent-behavior-report.d.ts +8 -0
  121. package/dist/lib/agent-behavior-report.js +185 -0
  122. package/dist/lib/baseline.d.ts +19 -0
  123. package/dist/lib/baseline.js +153 -0
  124. package/dist/lib/calculate-scores.d.ts +23 -0
  125. package/dist/lib/calculate-scores.js +42 -0
  126. package/dist/lib/compare.d.ts +18 -0
  127. package/dist/lib/compare.js +170 -0
  128. package/dist/lib/coverage-audit.d.ts +4 -0
  129. package/dist/lib/coverage-audit.js +42 -0
  130. package/dist/lib/discovery-report.d.ts +13 -0
  131. package/dist/lib/discovery-report.js +57 -0
  132. package/dist/lib/fetch-docs.d.ts +30 -0
  133. package/dist/lib/fetch-docs.js +171 -0
  134. package/dist/lib/generate-configs.d.ts +25 -0
  135. package/dist/lib/generate-configs.js +42 -0
  136. package/dist/lib/grader-api.d.ts +21 -0
  137. package/dist/lib/grader-api.js +34 -0
  138. package/dist/lib/grader-compare.d.ts +19 -0
  139. package/dist/lib/grader-compare.js +91 -0
  140. package/dist/lib/grader-consistency.d.ts +27 -0
  141. package/dist/lib/grader-consistency.js +79 -0
  142. package/dist/lib/grader-sensitivity.d.ts +19 -0
  143. package/dist/lib/grader-sensitivity.js +75 -0
  144. package/dist/lib/grader-validate.d.ts +19 -0
  145. package/dist/lib/grader-validate.js +78 -0
  146. package/dist/lib/measure-retrieval.d.ts +14 -0
  147. package/dist/lib/measure-retrieval.js +71 -0
  148. package/dist/lib/pr-comment.d.ts +16 -0
  149. package/dist/lib/pr-comment.js +28 -0
  150. package/dist/lib/readiness-report.d.ts +13 -0
  151. package/dist/lib/readiness-report.js +108 -0
  152. package/dist/lib/webhook-server.d.ts +11 -0
  153. package/dist/lib/webhook-server.js +24 -0
  154. package/dist/lib/weekly-digest.d.ts +24 -0
  155. package/dist/lib/weekly-digest.js +148 -0
  156. package/dist/orchestration/build-app-context.js +13 -0
  157. package/dist/orchestration/cache-context.d.ts +23 -0
  158. package/dist/orchestration/cache-context.js +43 -0
  159. package/dist/orchestration/env-bridge.d.ts +21 -0
  160. package/dist/orchestration/env-bridge.js +66 -0
  161. package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
  162. package/dist/orchestration/load-pipeline-tasks.js +52 -0
  163. package/dist/orchestration/pipeline-orchestrator.js +75 -5
  164. package/dist/orchestration/step-runner.js +5 -1
  165. package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
  166. package/dist/orchestration/steps/calculate-scores-step.js +13 -0
  167. package/dist/orchestration/steps/callback-step.js +10 -1
  168. package/dist/orchestration/steps/compare-step.js +6 -3
  169. package/dist/orchestration/steps/discovery-report-step.js +6 -2
  170. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  171. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  172. package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
  173. package/dist/orchestration/steps/fetch-docs-step.js +30 -16
  174. package/dist/orchestration/steps/gap-analysis-step.js +13 -2
  175. package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
  176. package/dist/orchestration/steps/generate-configs-step.js +50 -15
  177. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
  178. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  179. package/dist/orchestration/steps/publish-report-step.js +19 -0
  180. package/dist/orchestration/steps/readiness-step.js +8 -3
  181. package/dist/orchestration/steps/report-step.js +17 -4
  182. package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
  183. package/dist/orchestration/steps/run-eval-step.js +51 -31
  184. package/dist/pipeline/agent-behavior-report.js +6 -0
  185. package/dist/pipeline/attribution.d.ts +1 -1
  186. package/dist/pipeline/attribution.js +1 -1
  187. package/dist/pipeline/cache.js +29 -15
  188. package/dist/pipeline/calculate-scores.d.ts +2 -0
  189. package/dist/pipeline/calculate-scores.js +70 -33
  190. package/dist/pipeline/chronic-failures.d.ts +55 -0
  191. package/dist/pipeline/chronic-failures.js +110 -0
  192. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +33 -0
  193. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
  194. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
  195. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
  196. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
  197. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
  198. package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
  199. package/dist/pipeline/compiler/assertion-mapper.js +1 -1
  200. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
  201. package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
  202. package/dist/pipeline/compiler/config-loader.d.ts +14 -0
  203. package/dist/pipeline/compiler/config-loader.js +42 -2
  204. package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
  205. package/dist/pipeline/compiler/fixture-resolver.js +1 -1
  206. package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
  207. package/dist/pipeline/compiler/ignore-fields.js +1 -1
  208. package/dist/pipeline/compiler/index.d.ts +2 -5
  209. package/dist/pipeline/compiler/index.js +2 -5
  210. package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
  211. package/dist/pipeline/compiler/literacy-bridge.js +1 -1
  212. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +1 -1
  213. package/dist/pipeline/compiler/mode-bases/agent-harness.js +1 -1
  214. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +1 -1
  215. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +1 -1
  216. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +13 -2
  217. package/dist/pipeline/compiler/mode-bases/literacy.js +55 -1
  218. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +1 -1
  219. package/dist/pipeline/compiler/mode-bases/mcp-server.js +1 -1
  220. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +1 -1
  221. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -1
  222. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
  223. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
  224. package/dist/pipeline/compiler/mode-handlers/index.d.ts +2 -2
  225. package/dist/pipeline/compiler/mode-handlers/index.js +2 -2
  226. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +1 -1
  227. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +1 -1
  228. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
  229. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
  230. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
  231. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +1 -1
  232. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
  233. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
  234. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
  235. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +334 -0
  236. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +1 -1
  237. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +1 -1
  238. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +1 -1
  239. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +1 -1
  240. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +4 -0
  241. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +69 -0
  242. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +307 -0
  243. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +22 -5
  244. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +6 -0
  245. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +10 -5
  246. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +314 -7
  247. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +10 -0
  248. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
  249. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
  250. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +1 -1
  251. package/dist/pipeline/compiler/presets/sanity-literacy.js +1 -1
  252. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
  253. package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
  254. package/dist/pipeline/compiler/provider-assembler.js +13 -7
  255. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
  256. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
  257. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
  258. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
  259. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
  260. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
  261. package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
  262. package/dist/pipeline/compiler/sandbox/index.js +1 -1
  263. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
  264. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
  265. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
  266. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
  267. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
  268. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
  269. package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
  270. package/dist/pipeline/compiler/scoring-bridge.js +1 -1
  271. package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
  272. package/dist/pipeline/compiler/task-bridge.js +92 -0
  273. package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
  274. package/dist/pipeline/compiler/task-graph-builder.js +1 -4
  275. package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
  276. package/dist/pipeline/compiler/telemetry/index.js +1 -1
  277. package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
  278. package/dist/pipeline/compiler/variable-resolver.js +1 -1
  279. package/dist/pipeline/coverage-audit.d.ts +1 -1
  280. package/dist/pipeline/coverage-audit.js +1 -1
  281. package/dist/pipeline/degradations.d.ts +1 -1
  282. package/dist/pipeline/degradations.js +1 -1
  283. package/dist/pipeline/failure-modes.d.ts +1 -1
  284. package/dist/pipeline/failure-modes.js +13 -1
  285. package/dist/pipeline/gap-analysis.d.ts +1 -1
  286. package/dist/pipeline/gap-analysis.js +3 -1
  287. package/dist/pipeline/generate-configs.d.ts +2 -2
  288. package/dist/pipeline/generate-configs.js +15 -8
  289. package/dist/pipeline/grader-compare-runner.d.ts +1 -1
  290. package/dist/pipeline/grader-compare-runner.js +7 -1
  291. package/dist/pipeline/grader-comparison.d.ts +1 -1
  292. package/dist/pipeline/grader-comparison.js +1 -1
  293. package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
  294. package/dist/pipeline/grader-consistency-runner.js +7 -1
  295. package/dist/pipeline/grader-consistency.d.ts +1 -1
  296. package/dist/pipeline/grader-consistency.js +1 -1
  297. package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
  298. package/dist/pipeline/grader-sensitivity-runner.js +1 -1
  299. package/dist/pipeline/grader-sensitivity.d.ts +1 -1
  300. package/dist/pipeline/grader-sensitivity.js +1 -1
  301. package/dist/pipeline/grader-validate-runner.d.ts +1 -1
  302. package/dist/pipeline/grader-validate-runner.js +2 -2
  303. package/dist/pipeline/grader-validation.d.ts +1 -1
  304. package/dist/pipeline/grader-validation.js +1 -1
  305. package/dist/pipeline/map-request-to-config.js +15 -2
  306. package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
  307. package/dist/pipeline/mirror-repo-tasks.js +1 -1
  308. package/dist/pipeline/plan-format.d.ts +1 -1
  309. package/dist/pipeline/plan-format.js +1 -1
  310. package/dist/pipeline/plan.d.ts +1 -1
  311. package/dist/pipeline/plan.js +67 -29
  312. package/dist/pipeline/probe.d.ts +1 -1
  313. package/dist/pipeline/probe.js +1 -1
  314. package/dist/pipeline/readiness-report.d.ts +2 -2
  315. package/dist/pipeline/readiness-report.js +2 -2
  316. package/dist/pipeline/release-classification.d.ts +1 -1
  317. package/dist/pipeline/release-classification.js +1 -1
  318. package/dist/pipeline/release-report.d.ts +1 -1
  319. package/dist/pipeline/release-report.js +1 -1
  320. package/dist/pipeline/repo-eval-comment.d.ts +1 -1
  321. package/dist/pipeline/repo-eval-comment.js +1 -1
  322. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  323. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  324. package/dist/pipeline/resolve-mappings.d.ts +6 -6
  325. package/dist/pipeline/resolve-mappings.js +44 -44
  326. package/dist/pipeline/retrieval-metrics.d.ts +3 -3
  327. package/dist/pipeline/retrieval-metrics.js +28 -20
  328. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  329. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  330. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  331. package/dist/pipeline/steps/compare-step.js +90 -0
  332. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  333. package/dist/pipeline/steps/eval-step.js +347 -0
  334. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  335. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  336. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  337. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  338. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  339. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  340. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  341. package/dist/pipeline/steps/publish-report-step.js +243 -0
  342. package/dist/pipeline/steps/report-step.d.ts +13 -0
  343. package/dist/pipeline/steps/report-step.js +56 -0
  344. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  345. package/dist/pipeline/steps/update-scores-step.js +42 -0
  346. package/dist/pipeline/targeted-loo.d.ts +1 -1
  347. package/dist/pipeline/targeted-loo.js +1 -1
  348. package/dist/pipeline/thresholds.d.ts +1 -1
  349. package/dist/pipeline/thresholds.js +1 -1
  350. package/dist/pipeline/validate.js +13 -0
  351. package/dist/report-store.d.ts +17 -0
  352. package/dist/report-store.js +24 -0
  353. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  354. package/dist/scripts/agent-behavior-report.js +315 -0
  355. package/dist/scripts/baseline.d.ts +43 -0
  356. package/dist/scripts/baseline.js +267 -0
  357. package/dist/scripts/calculate-scores.d.ts +166 -0
  358. package/dist/scripts/calculate-scores.js +1296 -0
  359. package/dist/scripts/compare.d.ts +22 -0
  360. package/dist/scripts/compare.js +334 -0
  361. package/dist/scripts/coverage-audit.d.ts +44 -0
  362. package/dist/scripts/coverage-audit.js +209 -0
  363. package/dist/scripts/debug-eval.d.ts +19 -0
  364. package/dist/scripts/debug-eval.js +73 -0
  365. package/dist/scripts/discovery-report.d.ts +58 -0
  366. package/dist/scripts/discovery-report.js +250 -0
  367. package/dist/scripts/fetch-docs.d.ts +35 -0
  368. package/dist/scripts/fetch-docs.js +472 -0
  369. package/dist/scripts/generate-configs.d.ts +66 -0
  370. package/dist/scripts/generate-configs.js +459 -0
  371. package/dist/scripts/grader-api.d.ts +27 -0
  372. package/dist/scripts/grader-api.js +206 -0
  373. package/dist/scripts/grader-compare.d.ts +22 -0
  374. package/dist/scripts/grader-compare.js +368 -0
  375. package/dist/scripts/grader-consistency.d.ts +20 -0
  376. package/dist/scripts/grader-consistency.js +313 -0
  377. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  378. package/dist/scripts/grader-sensitivity.js +354 -0
  379. package/dist/scripts/grader-validate.d.ts +19 -0
  380. package/dist/scripts/grader-validate.js +267 -0
  381. package/dist/scripts/measure-retrieval.d.ts +10 -0
  382. package/dist/scripts/measure-retrieval.js +145 -0
  383. package/dist/scripts/migrate-task-mode.d.ts +1 -1
  384. package/dist/scripts/migrate-task-mode.js +1 -1
  385. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
  386. package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
  387. package/dist/scripts/pipeline.d.ts +76 -0
  388. package/dist/scripts/pipeline.js +1031 -0
  389. package/dist/scripts/pr-comment.d.ts +10 -0
  390. package/dist/scripts/pr-comment.js +510 -0
  391. package/dist/scripts/readiness-report.d.ts +88 -0
  392. package/dist/scripts/readiness-report.js +342 -0
  393. package/dist/scripts/update-quality-scores.d.ts +15 -0
  394. package/dist/scripts/update-quality-scores.js +184 -0
  395. package/dist/scripts/validate-task-sources.d.ts +1 -1
  396. package/dist/scripts/validate-task-sources.js +1 -1
  397. package/dist/scripts/validate.d.ts +13 -0
  398. package/dist/scripts/validate.js +79 -0
  399. package/dist/scripts/webhook-server.d.ts +26 -0
  400. package/dist/scripts/webhook-server.js +147 -0
  401. package/dist/scripts/weekly-digest.d.ts +24 -0
  402. package/dist/scripts/weekly-digest.js +144 -0
  403. package/dist/sinks/format-slack.d.ts +64 -0
  404. package/dist/sinks/format-slack.js +306 -0
  405. package/dist/sinks/slack-sink.d.ts +27 -0
  406. package/dist/sinks/slack-sink.js +78 -0
  407. package/dist/sinks/types.d.ts +1 -1
  408. package/dist/sinks/types.js +1 -1
  409. package/dist/sinks/webhook-sink.d.ts +19 -0
  410. package/dist/sinks/webhook-sink.js +50 -0
  411. package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
  412. package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
  413. package/dist/tasks/literacy/content-lake.task.ts +181 -0
  414. package/dist/tasks/literacy/frameworks.task.ts +129 -0
  415. package/dist/tasks/literacy/functions.task.ts +70 -0
  416. package/dist/tasks/literacy/groq.task.ts +259 -0
  417. package/dist/tasks/literacy/image-handling.task.ts +95 -0
  418. package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
  419. package/dist/tasks/literacy/portable-text.task.ts +169 -0
  420. package/dist/tasks/literacy/studio-setup.task.ts +134 -0
  421. package/dist/tasks/literacy/visual-editing.task.ts +147 -0
  422. package/package.json +24 -24
  423. package/tasks/.expanded.agentic.yaml +280 -0
  424. package/tasks/.expanded.yaml +565 -0
  425. package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
  426. package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
  427. package/tasks/literacy/content-lake.task.ts +181 -0
  428. package/tasks/literacy/frameworks.task.ts +1 -0
  429. package/tasks/literacy/functions.task.ts +1 -0
  430. package/tasks/literacy/groq.task.ts +1 -0
  431. package/tasks/literacy/image-handling.task.ts +95 -0
  432. package/tasks/literacy/nextjs-live.task.ts +2 -1
  433. package/tasks/literacy/portable-text.task.ts +169 -0
  434. package/tasks/literacy/studio-setup.task.ts +5 -2
  435. package/tasks/literacy/visual-editing.task.ts +1 -0
  436. package/LICENSE +0 -21
  437. package/tasks/frameworks.yaml +0 -98
  438. package/tasks/functions.yaml +0 -51
  439. package/tasks/groq.yaml +0 -216
  440. package/tasks/nextjs-live.yaml +0 -62
  441. package/tasks/studio-setup.yaml +0 -111
  442. package/tasks/visual-editing.yaml +0 -120
@@ -0,0 +1,57 @@
1
+ /**
2
+ * lib/discovery-report.ts — DEPRECATED re-export shim.
3
+ *
4
+ * The real implementation has moved to pipeline/discovery-report.ts.
5
+ *
6
+ * @deprecated Import from ../pipeline/discovery-report.js instead.
7
+ */
8
+ import { existsSync, readFileSync, writeFileSync } from "node:fs";
9
+ import { dirname, join, resolve } from "node:path";
10
+ import { fileURLToPath } from "node:url";
11
+ export { formatDiscoveryMarkdown, generateDiscoveryReport, } from "../pipeline/discovery-report.js";
12
+ import { formatDiscoveryMarkdown, generateDiscoveryReport, } from "../pipeline/discovery-report.js";
13
+ const __dirname = dirname(fileURLToPath(import.meta.url));
14
+ const ROOT = resolve(__dirname, "..", "..");
15
+ /**
16
+ * Legacy main() entry point.
17
+ * @deprecated Use generateDiscoveryReport() + formatDiscoveryMarkdown() directly.
18
+ */
19
+ export function main() {
20
+ const args = process.argv.slice(2);
21
+ let output;
22
+ const areaFilter = [];
23
+ let summaryPath = join(ROOT, "results", "latest", "score-summary.json");
24
+ for (let i = 0; i < args.length; i++) {
25
+ if (args[i] === "--output" && args[i + 1]) {
26
+ output = args[++i];
27
+ }
28
+ else if (args[i] === "--area" && args[i + 1]) {
29
+ areaFilter.push(...args[++i].split(","));
30
+ }
31
+ else if (args[i] === "--input" && args[i + 1]) {
32
+ summaryPath = args[++i];
33
+ }
34
+ else if (!args[i].startsWith("-")) {
35
+ summaryPath = args[i];
36
+ }
37
+ }
38
+ if (!existsSync(summaryPath)) {
39
+ console.error(`❌ Score summary not found: ${summaryPath}`);
40
+ console.error("Run an agentic evaluation first: pnpm pipeline -- --mode agentic");
41
+ process.exit(1);
42
+ }
43
+ const summary = JSON.parse(readFileSync(summaryPath, "utf-8"));
44
+ const report = generateDiscoveryReport(summary, areaFilter.length > 0 ? areaFilter : undefined);
45
+ const markdown = formatDiscoveryMarkdown(report);
46
+ if (output) {
47
+ writeFileSync(output, markdown, "utf-8");
48
+ console.log(`✅ Discovery report written to ${output}`);
49
+ }
50
+ else {
51
+ console.log(markdown);
52
+ }
53
+ }
54
+ if (process.argv[1]?.endsWith("discovery-report.ts") ||
55
+ process.argv[1]?.endsWith("discovery-report.js")) {
56
+ main();
57
+ }
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Fetch-docs.ts — CLI entry point for documentation fetching.
3
+ *
4
+ * This is a standalone script for direct CLI invocation:
5
+ * npx tsx src/lib/fetch-docs.ts [--source name] [--include-feature-areas] [--include-corpus]
6
+ *
7
+ * The pipeline uses ctx.docFetcher (SanityDocFetcher adapter) directly via
8
+ * FetchDocsStep — this file is NOT called by the pipeline.
9
+ *
10
+ * Capabilities:
11
+ * 1. Canonical contexts — delegates to SanityDocFetcher adapter (always)
12
+ * 2. Feature-area contexts — one file per GROQ feature area query
13
+ * (opt-in via --include-feature-areas)
14
+ * 3. Full corpus — all articles in one file
15
+ * (opt-in via --include-corpus)
16
+ */
17
+ import "dotenv/config";
18
+ import { type ResolvedSourceConfig } from "../sources.js";
19
+ /** Options for the fetch-docs main() function. */
20
+ export interface FetchDocsOptions {
21
+ /** Fetch full corpus (for retrieval experiments) */
22
+ includeCorpus?: boolean;
23
+ /** Fetch feature-area contexts in addition to canonical contexts */
24
+ includeFeatureAreas?: boolean;
25
+ /** Pre-resolved source config (skips loadSource() call) */
26
+ resolvedSource?: ResolvedSourceConfig;
27
+ /** Documentation source name (e.g., "branch", "local") */
28
+ source?: string;
29
+ }
30
+ export declare function main(options?: FetchDocsOptions): Promise<void>;
@@ -0,0 +1,171 @@
1
+ /**
2
+ * Fetch-docs.ts — CLI entry point for documentation fetching.
3
+ *
4
+ * This is a standalone script for direct CLI invocation:
5
+ * npx tsx src/lib/fetch-docs.ts [--source name] [--include-feature-areas] [--include-corpus]
6
+ *
7
+ * The pipeline uses ctx.docFetcher (SanityDocFetcher adapter) directly via
8
+ * FetchDocsStep — this file is NOT called by the pipeline.
9
+ *
10
+ * Capabilities:
11
+ * 1. Canonical contexts — delegates to SanityDocFetcher adapter (always)
12
+ * 2. Feature-area contexts — one file per GROQ feature area query
13
+ * (opt-in via --include-feature-areas)
14
+ * 3. Full corpus — all articles in one file
15
+ * (opt-in via --include-corpus)
16
+ */
17
+ // oxlint-disable-next-line import/no-unassigned-import -- side-effect: loads .env into process.env
18
+ import "dotenv/config";
19
+ import { mkdirSync, writeFileSync } from "fs";
20
+ import { dirname, join } from "path";
21
+ import { SanityDocFetcher } from "../adapters/doc-fetchers/sanity-doc-fetcher.js";
22
+ import { YamlTaskSource } from "../adapters/task-sources/index.js";
23
+ import { getSanityClient } from "../sanity/client.js";
24
+ import { toMarkdown } from "../sanity/portable-text.js";
25
+ import { ALL_ARTICLES_QUERY, ALL_FEATURE_AREAS, FEATURE_AREA_QUERIES, } from "../sanity/queries.js";
26
+ import { loadSource } from "../sources.js";
27
+ // ---------------------------------------------------------------------------
28
+ // Helpers
29
+ // ---------------------------------------------------------------------------
30
+ const ROOT = join(dirname(new URL(import.meta.url).pathname), "..", "..");
31
+ function escapeNunjucks(text) {
32
+ return text.replace(/\{\{|\}\}/g, (match) => match === "{{" ? '{{ "{{" }}' : '{{ "}}" }}');
33
+ }
34
+ function estimateTokens(text) {
35
+ return Math.ceil(text.length / 4);
36
+ }
37
+ function formatArticle(doc) {
38
+ const sectionLabel = doc.section ? `Section: ${doc.section.title}\n` : "";
39
+ const desc = doc.description ? `${doc.description}\n\n` : "";
40
+ const markdown = toMarkdown(doc.content ?? []);
41
+ return `## ${doc.title}\n\n${sectionLabel}${desc}${markdown}`;
42
+ }
43
+ // ---------------------------------------------------------------------------
44
+ // Feature-area contexts (opt-in, not part of the port)
45
+ // ---------------------------------------------------------------------------
46
+ async function generateFeatureAreaContexts(source) {
47
+ const client = getSanityClient(undefined, source);
48
+ const contextsDir = join(ROOT, "contexts");
49
+ mkdirSync(contextsDir, { recursive: true });
50
+ console.log("Generating feature-area contexts...\n");
51
+ for (const feature of ALL_FEATURE_AREAS) {
52
+ const query = FEATURE_AREA_QUERIES[feature];
53
+ const docs = await client.fetch(query);
54
+ if (docs.length === 0) {
55
+ console.warn(` [warn] No articles found for "${feature}"`);
56
+ continue;
57
+ }
58
+ const combined = docs.map(formatArticle).join("\n\n---\n\n");
59
+ const outPath = join(contextsDir, `${feature}.md`);
60
+ writeFileSync(outPath, escapeNunjucks(combined));
61
+ console.log(` ${feature}: ${docs.length} articles, ~${estimateTokens(combined)} tokens`);
62
+ }
63
+ }
64
+ // ---------------------------------------------------------------------------
65
+ // Full corpus (opt-in, not part of the port)
66
+ // ---------------------------------------------------------------------------
67
+ async function generateFullCorpus(source) {
68
+ const client = getSanityClient(undefined, source);
69
+ console.log("\nGenerating full corpus...");
70
+ const docs = await client.fetch(ALL_ARTICLES_QUERY);
71
+ const corpus = docs
72
+ .map((d) => {
73
+ const ptBlocks = d.content ?? [];
74
+ const markdown = toMarkdown(ptBlocks);
75
+ return (`## ${d.title}\n\n` +
76
+ // oxlint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- empty string title should fall back to "General"
77
+ `Section: ${d.section?.title || "General"}\n` +
78
+ `URL: ${source.baseUrl}/${d.slug}\n\n` +
79
+ markdown);
80
+ })
81
+ .join("\n\n---\n\n");
82
+ const outDir = join(ROOT, "contexts");
83
+ mkdirSync(outDir, { recursive: true });
84
+ writeFileSync(join(outDir, "full-corpus.md"), escapeNunjucks(corpus));
85
+ console.log(` full-corpus.md: ${docs.length} articles, ~${estimateTokens(corpus)} tokens`);
86
+ }
87
+ export async function main(options) {
88
+ console.log("=== ai-literacy-framework — Documentation Fetcher ===\n");
89
+ const args = process.argv.slice(2);
90
+ // Options win over argv (backward compatible)
91
+ const includeFeatureAreas = options?.includeFeatureAreas ?? args.includes("--include-feature-areas");
92
+ const includeCorpus = options?.includeCorpus ?? args.includes("--include-corpus");
93
+ // Pre-resolved source wins over name-based lookup (backward compatible)
94
+ const sourceIdx = args.indexOf("--source");
95
+ const sourceName = options?.source ?? (sourceIdx !== -1 ? args[sourceIdx + 1] : undefined);
96
+ const source = options?.resolvedSource ?? loadSource(sourceName);
97
+ console.log(` Source: ${sourceName ?? "default (production)"}`);
98
+ console.log(` Base URL: ${source.baseUrl}`);
99
+ if (source.dataset) {
100
+ console.log(` Dataset: ${source.dataset}`);
101
+ }
102
+ if (source.perspective) {
103
+ console.log(` Perspective: ${source.perspective}`);
104
+ }
105
+ if (source.documentIds && source.documentIds.length > 0) {
106
+ console.log(` Documents: ${source.documentIds.length} document ID(s)`);
107
+ for (const id of source.documentIds) {
108
+ console.log(` ${id}`);
109
+ }
110
+ }
111
+ if (source.urls.length > 0) {
112
+ console.log(` URLs: ${source.urls.length} direct URL(s)`);
113
+ for (const u of source.urls) {
114
+ console.log(` ${u}`);
115
+ }
116
+ }
117
+ console.log();
118
+ if (includeFeatureAreas) {
119
+ await generateFeatureAreaContexts(source);
120
+ }
121
+ // Canonical contexts — delegate to the SanityDocFetcher adapter
122
+ const taskSource = new YamlTaskSource(ROOT);
123
+ const tasks = await taskSource.loadTasks();
124
+ const tasksWithDocs = tasks.filter((t) => t.canonicalDocs.length > 0);
125
+ if (tasksWithDocs.length > 0) {
126
+ console.log("\nGenerating canonical (gold-retrieval) contexts...\n");
127
+ const fetcher = new SanityDocFetcher(ROOT);
128
+ const result = await fetcher.fetch(tasksWithDocs, source);
129
+ // Write metadata files inline (CLI mode)
130
+ const contextsDir = join(ROOT, "contexts");
131
+ mkdirSync(contextsDir, { recursive: true });
132
+ if (result.metadata?.manifest) {
133
+ const path = join(contextsDir, "document-manifest.json");
134
+ writeFileSync(path, JSON.stringify(result.metadata.manifest, null, 2));
135
+ console.log(`\n 📋 Document manifest: ${result.metadata.manifest.length} docs → contexts/document-manifest.json`);
136
+ }
137
+ if (result.metadata?.releaseImpact) {
138
+ const path = join(contextsDir, "release-impact.json");
139
+ writeFileSync(path, JSON.stringify(result.metadata.releaseImpact, null, 2));
140
+ console.log(" 📄 Release impact written to contexts/release-impact.json");
141
+ }
142
+ if (result.metadata?.documentOverlay) {
143
+ const path = join(contextsDir, "document-overlay.json");
144
+ writeFileSync(path, JSON.stringify(result.metadata.documentOverlay, null, 2));
145
+ console.log(" 📄 Document overlay written to contexts/document-overlay.json");
146
+ }
147
+ if (result.metadata?.urlFetch) {
148
+ const path = join(contextsDir, "url-fetch.json");
149
+ writeFileSync(path, JSON.stringify(result.metadata.urlFetch, null, 2));
150
+ console.log(" 📄 URL fetch metadata written to contexts/url-fetch.json");
151
+ }
152
+ console.log(`\n Canonical contexts: ${result.contexts.length} tasks`);
153
+ for (const ctx of result.contexts) {
154
+ console.log(` ${ctx.taskId}: ${ctx.slugs.length} doc(s), ~${ctx.tokenCount ?? 0} tokens`);
155
+ }
156
+ }
157
+ if (includeCorpus) {
158
+ await generateFullCorpus(source);
159
+ }
160
+ console.log("\nDone!");
161
+ }
162
+ // ---------------------------------------------------------------------------
163
+ // Main — only run when invoked directly
164
+ // ---------------------------------------------------------------------------
165
+ if (process.argv[1]?.endsWith("fetch-docs.ts") ||
166
+ process.argv[1]?.endsWith("fetch-docs.js")) {
167
+ main().catch((err) => {
168
+ console.error("Fatal error:", err);
169
+ process.exit(1);
170
+ });
171
+ }
@@ -0,0 +1,25 @@
1
+ /**
2
+ * lib/generate-configs.ts — DEPRECATED re-export shim.
3
+ *
4
+ * The real implementation has moved to pipeline/generate-configs.ts.
5
+ * This shim preserves backward compatibility for:
6
+ * - Direct CLI invocation: `tsx src/lib/generate-configs.ts`
7
+ * - Test imports that haven't been updated yet
8
+ *
9
+ * TODO: Update all importers to use pipeline/generate-configs.ts, then delete this file.
10
+ *
11
+ * @deprecated Import from ../pipeline/generate-configs.js instead.
12
+ */
13
+ export { discoverTaskFiles, extractModelName, extractProvider, generateConfigs, loadPrompts, mergeConfig, modelMatchesMode, type GenerateConfigsOptions, } from "../pipeline/generate-configs.js";
14
+ import type { GenerateConfigsOptions } from "../pipeline/generate-configs.js";
15
+ /**
16
+ * Legacy main() entry point — wraps generateConfigs() with env var fallbacks.
17
+ *
18
+ * Only used for direct CLI invocation (`tsx src/lib/generate-configs.ts`).
19
+ * Command handlers and orchestration steps should call generateConfigs() directly.
20
+ *
21
+ * @deprecated Use generateConfigs() from pipeline/generate-configs.ts instead.
22
+ */
23
+ export declare function main(options?: Omit<GenerateConfigsOptions, "rootDir"> & {
24
+ rootDir?: string;
25
+ }): void;
@@ -0,0 +1,42 @@
1
+ /**
2
+ * lib/generate-configs.ts — DEPRECATED re-export shim.
3
+ *
4
+ * The real implementation has moved to pipeline/generate-configs.ts.
5
+ * This shim preserves backward compatibility for:
6
+ * - Direct CLI invocation: `tsx src/lib/generate-configs.ts`
7
+ * - Test imports that haven't been updated yet
8
+ *
9
+ * TODO: Update all importers to use pipeline/generate-configs.ts, then delete this file.
10
+ *
11
+ * @deprecated Import from ../pipeline/generate-configs.js instead.
12
+ */
13
+ import { dirname, resolve } from "path";
14
+ import { fileURLToPath } from "url";
15
+ // Re-export everything from the real implementation
16
+ export { discoverTaskFiles, extractModelName, extractProvider, generateConfigs, loadPrompts, mergeConfig, modelMatchesMode, } from "../pipeline/generate-configs.js";
17
+ import { generateConfigs } from "../pipeline/generate-configs.js";
18
+ const __dirname = dirname(fileURLToPath(import.meta.url));
19
+ const ROOT = resolve(__dirname, "../..");
20
+ /**
21
+ * Legacy main() entry point — wraps generateConfigs() with env var fallbacks.
22
+ *
23
+ * Only used for direct CLI invocation (`tsx src/lib/generate-configs.ts`).
24
+ * Command handlers and orchestration steps should call generateConfigs() directly.
25
+ *
26
+ * @deprecated Use generateConfigs() from pipeline/generate-configs.ts instead.
27
+ */
28
+ export function main(options) {
29
+ generateConfigs({
30
+ rootDir: options?.rootDir ?? ROOT,
31
+ allowedOrigins: options?.allowedOrigins,
32
+ filter: options?.filter,
33
+ resolvedSource: options?.resolvedSource,
34
+ searchMode: options?.searchMode,
35
+ source: options?.source ?? process.argv[2] ?? process.env.DOC_SOURCE,
36
+ });
37
+ }
38
+ // Only run when invoked directly (not when imported for testing)
39
+ if (process.argv[1]?.endsWith("generate-configs.ts") ||
40
+ process.argv[1]?.endsWith("generate-configs.js")) {
41
+ main();
42
+ }
@@ -0,0 +1,21 @@
1
+ /**
2
+ * lib/grader-api.ts — DEPRECATED re-export shim.
3
+ *
4
+ * The real implementation has moved to pipeline/grader-api.ts.
5
+ * This shim preserves backward compatibility for:
6
+ * - Direct CLI invocation and other lib/ files that import from here
7
+ * - Test imports that haven't been updated yet
8
+ *
9
+ * TODO: Update all importers to use pipeline/grader-api.ts, then delete this file.
10
+ *
11
+ * @deprecated Import from ../pipeline/grader-api.js instead.
12
+ */
13
+ export { extractScore, gradeOnce, resolveProvider, } from "../pipeline/grader-api.js";
14
+ /**
15
+ * Backward-compatible loadGraderModel that uses EVAL_ROOT.
16
+ * @deprecated Use loadGraderModel(rootDir) from pipeline/grader-api.js instead.
17
+ */
18
+ export declare function loadGraderModel(): {
19
+ id: string;
20
+ label: string;
21
+ };
@@ -0,0 +1,34 @@
1
+ /**
2
+ * lib/grader-api.ts — DEPRECATED re-export shim.
3
+ *
4
+ * The real implementation has moved to pipeline/grader-api.ts.
5
+ * This shim preserves backward compatibility for:
6
+ * - Direct CLI invocation and other lib/ files that import from here
7
+ * - Test imports that haven't been updated yet
8
+ *
9
+ * TODO: Update all importers to use pipeline/grader-api.ts, then delete this file.
10
+ *
11
+ * @deprecated Import from ../pipeline/grader-api.js instead.
12
+ */
13
+ import { config as dotenvConfig } from "dotenv";
14
+ import { existsSync } from "fs";
15
+ import { dirname, resolve } from "path";
16
+ import { fileURLToPath } from "url";
17
+ // Re-export everything from the real implementation
18
+ export { extractScore, gradeOnce, resolveProvider, } from "../pipeline/grader-api.js";
19
+ import { loadGraderModel as _loadGraderModel } from "../pipeline/grader-api.js";
20
+ const __dirname = dirname(fileURLToPath(import.meta.url));
21
+ const EVAL_ROOT = resolve(__dirname, "..", "..");
22
+ // Load root .env (two levels above packages/eval/) so API keys are available
23
+ // even when this module is invoked via `tsx` from packages/eval/.
24
+ const rootEnvPath = resolve(EVAL_ROOT, "..", "..", ".env");
25
+ if (existsSync(rootEnvPath)) {
26
+ dotenvConfig({ override: true, path: rootEnvPath });
27
+ }
28
+ /**
29
+ * Backward-compatible loadGraderModel that uses EVAL_ROOT.
30
+ * @deprecated Use loadGraderModel(rootDir) from pipeline/grader-api.js instead.
31
+ */
32
+ export function loadGraderModel() {
33
+ return _loadGraderModel(EVAL_ROOT);
34
+ }
@@ -0,0 +1,19 @@
1
+ /**
2
+ * lib/grader-compare.ts — DEPRECATED re-export shim.
3
+ *
4
+ * The real implementation has moved to pipeline/grader-compare-runner.ts.
5
+ * The pure analysis functions live in pipeline/grader-comparison.ts.
6
+ * This shim preserves backward compatibility for direct CLI invocation.
7
+ *
8
+ * TODO: Update all importers to use pipeline/ modules, then delete this file.
9
+ *
10
+ * @deprecated Import from ../pipeline/grader-compare-runner.js instead.
11
+ */
12
+ import "dotenv/config";
13
+ export { formatComparisonReport, runGraderCompare, type GraderCompareRunnerOptions, } from "../pipeline/grader-compare-runner.js";
14
+ export { compareGraders, type GraderComparison, type GraderScore, type GraderScoreSet, } from "../pipeline/grader-comparison.js";
15
+ /**
16
+ * Legacy main() entry point.
17
+ * @deprecated Use runGraderCompare() from pipeline/grader-compare-runner.js instead.
18
+ */
19
+ export declare function main(): Promise<void>;
@@ -0,0 +1,91 @@
1
+ /**
2
+ * lib/grader-compare.ts — DEPRECATED re-export shim.
3
+ *
4
+ * The real implementation has moved to pipeline/grader-compare-runner.ts.
5
+ * The pure analysis functions live in pipeline/grader-comparison.ts.
6
+ * This shim preserves backward compatibility for direct CLI invocation.
7
+ *
8
+ * TODO: Update all importers to use pipeline/ modules, then delete this file.
9
+ *
10
+ * @deprecated Import from ../pipeline/grader-compare-runner.js instead.
11
+ */
12
+ // oxlint-disable-next-line import/no-unassigned-import -- side-effect: loads .env into process.env
13
+ import "dotenv/config";
14
+ import { dirname, resolve } from "path";
15
+ import { fileURLToPath } from "url";
16
+ import { load } from "js-yaml";
17
+ import { existsSync, readFileSync } from "fs";
18
+ // Re-export from pipeline modules
19
+ export { formatComparisonReport, runGraderCompare, } from "../pipeline/grader-compare-runner.js";
20
+ export { compareGraders, } from "../pipeline/grader-comparison.js";
21
+ import { runGraderCompare } from "../pipeline/grader-compare-runner.js";
22
+ const __dirname = dirname(fileURLToPath(import.meta.url));
23
+ const ROOT = resolve(__dirname, "..", "..");
24
+ function parseCliArgs() {
25
+ const args = process.argv.slice(2);
26
+ function getAllOptions(name) {
27
+ const results = [];
28
+ const flag = `--${name}`;
29
+ for (let i = 0; i < args.length; i++) {
30
+ if (args[i] === flag && i + 1 < args.length) {
31
+ results.push(args[i + 1]);
32
+ }
33
+ }
34
+ return results;
35
+ }
36
+ function getOption(name) {
37
+ const idx = args.indexOf(`--${name}`);
38
+ return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : undefined;
39
+ }
40
+ const candidateArgs = getAllOptions("candidate");
41
+ let candidates;
42
+ if (candidateArgs.length > 0) {
43
+ candidates = candidateArgs.map((id) => ({
44
+ id,
45
+ label: id.split(":").pop() ?? id,
46
+ }));
47
+ }
48
+ else {
49
+ const modelsPath = resolve(ROOT, "config", "models.yaml");
50
+ if (existsSync(modelsPath)) {
51
+ const raw = readFileSync(modelsPath, "utf-8");
52
+ const data = load(raw);
53
+ const configCandidates = data?.["grader-candidates"] ?? [];
54
+ candidates = configCandidates.map((c) => ({
55
+ id: c.id,
56
+ label: c.label ?? c.id.split(":").pop() ?? c.id,
57
+ }));
58
+ }
59
+ else {
60
+ candidates = [];
61
+ }
62
+ }
63
+ return {
64
+ candidates,
65
+ format: getOption("format") ?? "table",
66
+ outputPath: getOption("output"),
67
+ resultsPath: getOption("results") ?? "results/latest/eval-results.json",
68
+ };
69
+ }
70
+ /**
71
+ * Legacy main() entry point.
72
+ * @deprecated Use runGraderCompare() from pipeline/grader-compare-runner.js instead.
73
+ */
74
+ export async function main() {
75
+ const { candidates, format, outputPath, resultsPath } = parseCliArgs();
76
+ await runGraderCompare({
77
+ candidates,
78
+ format: format,
79
+ outputPath,
80
+ resultsPath,
81
+ rootDir: ROOT,
82
+ });
83
+ }
84
+ // Only run when invoked directly
85
+ if (process.argv[1]?.endsWith("grader-compare.ts") ||
86
+ process.argv[1]?.endsWith("grader-compare.js")) {
87
+ main().catch((err) => {
88
+ console.error("❌ Fatal error:", err);
89
+ process.exit(1);
90
+ });
91
+ }
@@ -0,0 +1,27 @@
1
+ /**
2
+ * lib/grader-consistency.ts — DEPRECATED re-export shim.
3
+ *
4
+ * The real implementation has moved to pipeline/grader-consistency-runner.ts.
5
+ * The pure analysis functions live in pipeline/grader-consistency.ts.
6
+ * This shim preserves backward compatibility for direct CLI invocation.
7
+ *
8
+ * TODO: Update all importers to use pipeline/ modules, then delete this file.
9
+ *
10
+ * @deprecated Import from ../pipeline/grader-consistency-runner.js instead.
11
+ */
12
+ import "dotenv/config";
13
+ export { extractGradingJudgments, formatConsistencyReport, runGraderConsistency, type GraderConsistencyRunnerOptions, } from "../pipeline/grader-consistency-runner.js";
14
+ export { analyzeConsistency, type GraderConsistency, type ReplicatedGrading, } from "../pipeline/grader-consistency.js";
15
+ /** Options for the legacy main() function. */
16
+ export interface GraderConsistencyOptions {
17
+ /** Number of additional grading replications (default: 5) */
18
+ replications?: number;
19
+ /** Path to eval-results.json */
20
+ resultsPath?: string;
21
+ }
22
+ /**
23
+ * Legacy main() entry point — wraps runGraderConsistency() with CLI arg parsing.
24
+ *
25
+ * @deprecated Use runGraderConsistency() from pipeline/grader-consistency-runner.js instead.
26
+ */
27
+ export declare function main(options?: GraderConsistencyOptions): Promise<void>;
@@ -0,0 +1,79 @@
1
+ /**
2
+ * lib/grader-consistency.ts — DEPRECATED re-export shim.
3
+ *
4
+ * The real implementation has moved to pipeline/grader-consistency-runner.ts.
5
+ * The pure analysis functions live in pipeline/grader-consistency.ts.
6
+ * This shim preserves backward compatibility for direct CLI invocation.
7
+ *
8
+ * TODO: Update all importers to use pipeline/ modules, then delete this file.
9
+ *
10
+ * @deprecated Import from ../pipeline/grader-consistency-runner.js instead.
11
+ */
12
+ // oxlint-disable-next-line import/no-unassigned-import -- side-effect: loads .env into process.env
13
+ import "dotenv/config";
14
+ import { dirname, join, resolve } from "path";
15
+ import { fileURLToPath } from "url";
16
+ // Re-export from pipeline modules
17
+ export { extractGradingJudgments, formatConsistencyReport, runGraderConsistency, } from "../pipeline/grader-consistency-runner.js";
18
+ export { analyzeConsistency, } from "../pipeline/grader-consistency.js";
19
+ import { runGraderConsistency } from "../pipeline/grader-consistency-runner.js";
20
+ const __dirname = dirname(fileURLToPath(import.meta.url));
21
+ const ROOT = resolve(__dirname, "..", "..");
22
+ // ---------------------------------------------------------------------------
23
+ // CLI argument parsing (used when called from CLI)
24
+ // ---------------------------------------------------------------------------
25
+ function parseCliArgs() {
26
+ const args = process.argv.slice(2);
27
+ function getFlag(name) {
28
+ return args.includes(`--${name}`);
29
+ }
30
+ function getOption(name) {
31
+ const idx = args.indexOf(`--${name}`);
32
+ return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : undefined;
33
+ }
34
+ const showHelp = getFlag("help") || getFlag("h");
35
+ if (showHelp) {
36
+ console.log(`
37
+ Usage: pnpm grader-consistency [options]
38
+
39
+ Measure grader consistency by re-grading existing eval responses N times.
40
+
41
+ Options:
42
+ --replications <n> Number of additional grading replications (default: 5)
43
+ --results <path> Path to eval-results.json (default: results/latest/eval-results.json)
44
+ --help, -h Show this help
45
+ `);
46
+ process.exit(0);
47
+ }
48
+ const replicationsStr = getOption("replications") ?? "5";
49
+ return {
50
+ replications: parseInt(replicationsStr, 10),
51
+ resultsPath: getOption("results") ??
52
+ join(ROOT, "results", "latest", "eval-results.json"),
53
+ };
54
+ }
55
+ /**
56
+ * Legacy main() entry point — wraps runGraderConsistency() with CLI arg parsing.
57
+ *
58
+ * @deprecated Use runGraderConsistency() from pipeline/grader-consistency-runner.js instead.
59
+ */
60
+ export async function main(options) {
61
+ const cliArgs = options ? undefined : parseCliArgs();
62
+ const replications = options?.replications ?? cliArgs?.replications ?? 5;
63
+ const resultsPath = options?.resultsPath ??
64
+ cliArgs?.resultsPath ??
65
+ join(ROOT, "results", "latest", "eval-results.json");
66
+ await runGraderConsistency({
67
+ replications,
68
+ resultsPath,
69
+ rootDir: ROOT,
70
+ });
71
+ }
72
+ // Only run when invoked directly
73
+ if (process.argv[1]?.endsWith("grader-consistency.ts") ||
74
+ process.argv[1]?.endsWith("grader-consistency.js")) {
75
+ main().catch((err) => {
76
+ console.error("❌ Fatal error:", err);
77
+ process.exit(1);
78
+ });
79
+ }
@@ -0,0 +1,19 @@
1
+ /**
2
+ * lib/grader-sensitivity.ts — DEPRECATED re-export shim.
3
+ *
4
+ * The real implementation has moved to pipeline/grader-sensitivity-runner.ts.
5
+ * The pure analysis functions live in pipeline/grader-sensitivity.ts.
6
+ * This shim preserves backward compatibility for direct CLI invocation.
7
+ *
8
+ * TODO: Update all importers to use pipeline/ modules, then delete this file.
9
+ *
10
+ * @deprecated Import from ../pipeline/grader-sensitivity-runner.js instead.
11
+ */
12
+ import "dotenv/config";
13
+ export { formatSensitivityReport, runGraderSensitivity, type GraderSensitivityRunnerOptions, } from "../pipeline/grader-sensitivity-runner.js";
14
+ export { analyzeSensitivity, type GraderSensitivityResult, type SensitivityPair, } from "../pipeline/grader-sensitivity.js";
15
+ /**
16
+ * Legacy main() entry point.
17
+ * @deprecated Use runGraderSensitivity() from pipeline/grader-sensitivity-runner.js instead.
18
+ */
19
+ export declare function main(): Promise<void>;