akm-cli 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (333) hide show
  1. package/CHANGELOG.md +66 -0
  2. package/dist/{cli.js → src/cli.js} +712 -34
  3. package/dist/{commands → src/commands}/config-cli.js +47 -4
  4. package/dist/src/commands/distill.js +283 -0
  5. package/dist/src/commands/events.js +108 -0
  6. package/dist/src/commands/history.js +191 -0
  7. package/dist/{commands → src/commands}/installed-stashes.js +1 -1
  8. package/dist/src/commands/proposal.js +119 -0
  9. package/dist/src/commands/propose.js +171 -0
  10. package/dist/src/commands/reflect.js +193 -0
  11. package/dist/{commands → src/commands}/registry-search.js +71 -7
  12. package/dist/{commands → src/commands}/remember.js +12 -0
  13. package/dist/{commands → src/commands}/search.js +104 -4
  14. package/dist/{commands → src/commands}/self-update.js +4 -3
  15. package/dist/{commands → src/commands}/show.js +73 -0
  16. package/dist/{commands → src/commands}/source-add.js +5 -1
  17. package/dist/{commands → src/commands}/source-manage.js +7 -1
  18. package/dist/{core → src/core}/asset-ref.js +5 -5
  19. package/dist/{core → src/core}/asset-spec.js +12 -0
  20. package/dist/{core → src/core}/common.js +1 -1
  21. package/dist/{core → src/core}/config.js +203 -121
  22. package/dist/{core → src/core}/errors.js +4 -0
  23. package/dist/src/core/events.js +239 -0
  24. package/dist/src/core/lesson-lint.js +86 -0
  25. package/dist/src/core/proposals.js +406 -0
  26. package/dist/src/core/warn.js +72 -0
  27. package/dist/{core → src/core}/write-source.js +80 -5
  28. package/dist/{indexer → src/indexer}/db-search.js +114 -24
  29. package/dist/{indexer → src/indexer}/db.js +76 -23
  30. package/dist/{indexer → src/indexer}/file-context.js +0 -3
  31. package/dist/src/indexer/graph-boost.js +179 -0
  32. package/dist/src/indexer/graph-extraction.js +212 -0
  33. package/dist/{indexer → src/indexer}/indexer.js +88 -7
  34. package/dist/{indexer → src/indexer}/matchers.js +1 -1
  35. package/dist/src/indexer/memory-inference.js +263 -0
  36. package/dist/{indexer → src/indexer}/metadata.js +111 -3
  37. package/dist/{indexer → src/indexer}/search-source.js +4 -2
  38. package/dist/src/integrations/agent/config.js +292 -0
  39. package/dist/src/integrations/agent/detect.js +94 -0
  40. package/dist/src/integrations/agent/index.js +17 -0
  41. package/dist/src/integrations/agent/profiles.js +65 -0
  42. package/dist/src/integrations/agent/prompts.js +167 -0
  43. package/dist/src/integrations/agent/spawn.js +272 -0
  44. package/dist/{integrations → src/integrations}/github.js +9 -3
  45. package/dist/{integrations → src/integrations}/lockfile.js +0 -26
  46. package/dist/{llm → src/llm}/client.js +33 -2
  47. package/dist/{llm → src/llm}/embedders/remote.js +37 -3
  48. package/dist/src/llm/feature-gate.js +108 -0
  49. package/dist/src/llm/graph-extract.js +107 -0
  50. package/dist/src/llm/index-passes.js +35 -0
  51. package/dist/src/llm/memory-infer.js +86 -0
  52. package/dist/{output → src/output}/cli-hints.js +15 -2
  53. package/dist/{output → src/output}/renderers.js +63 -2
  54. package/dist/src/output/shapes.js +523 -0
  55. package/dist/src/output/text.js +1116 -0
  56. package/dist/{registry → src/registry}/build-index.js +19 -8
  57. package/dist/{registry → src/registry}/factory.js +0 -8
  58. package/dist/{registry → src/registry}/providers/static-index.js +6 -3
  59. package/dist/{registry → src/registry}/resolve.js +68 -2
  60. package/dist/{setup → src/setup}/setup.js +52 -5
  61. package/dist/{sources → src/sources}/providers/git.js +7 -15
  62. package/dist/{wiki → src/wiki}/wiki.js +54 -6
  63. package/dist/{workflows → src/workflows}/runs.js +37 -3
  64. package/dist/tests/add-website-source.test.js +119 -0
  65. package/dist/tests/agent/agent-config-loader.test.js +70 -0
  66. package/dist/tests/agent/agent-config.test.js +221 -0
  67. package/dist/tests/agent/agent-detect.test.js +100 -0
  68. package/dist/tests/agent/agent-spawn.test.js +234 -0
  69. package/dist/tests/agent-output.test.js +186 -0
  70. package/dist/tests/architecture/agent-no-llm-sdk-guard.test.js +103 -0
  71. package/dist/tests/architecture/agent-spawn-seam.test.js +193 -0
  72. package/dist/tests/architecture/llm-stateless-seam.test.js +112 -0
  73. package/dist/tests/asset-ref.test.js +192 -0
  74. package/dist/tests/asset-registry.test.js +103 -0
  75. package/dist/tests/asset-spec.test.js +241 -0
  76. package/dist/tests/bench/attribution.test.js +996 -0
  77. package/dist/tests/bench/cleanup-sigint.test.js +83 -0
  78. package/dist/tests/bench/cleanup.js +234 -0
  79. package/dist/tests/bench/cleanup.test.js +166 -0
  80. package/dist/tests/bench/cli.js +1018 -0
  81. package/dist/tests/bench/cli.test.js +445 -0
  82. package/dist/tests/bench/compare.test.js +556 -0
  83. package/dist/tests/bench/corpus.js +317 -0
  84. package/dist/tests/bench/corpus.test.js +258 -0
  85. package/dist/tests/bench/doctor.js +525 -0
  86. package/dist/tests/bench/driver.js +401 -0
  87. package/dist/tests/bench/driver.test.js +584 -0
  88. package/dist/tests/bench/environment.js +233 -0
  89. package/dist/tests/bench/environment.test.js +199 -0
  90. package/dist/tests/bench/evolve-metrics.js +179 -0
  91. package/dist/tests/bench/evolve-metrics.test.js +187 -0
  92. package/dist/tests/bench/evolve.js +647 -0
  93. package/dist/tests/bench/evolve.test.js +624 -0
  94. package/dist/tests/bench/failure-modes.test.js +349 -0
  95. package/dist/tests/bench/feedback-integrity.test.js +457 -0
  96. package/dist/tests/bench/leakage.test.js +228 -0
  97. package/dist/tests/bench/learning-curve.test.js +134 -0
  98. package/dist/tests/bench/metrics.js +2395 -0
  99. package/dist/tests/bench/metrics.test.js +1150 -0
  100. package/dist/tests/bench/no-os-tmpdir-invariant.test.js +43 -0
  101. package/dist/tests/bench/opencode-config.js +194 -0
  102. package/dist/tests/bench/opencode-config.test.js +370 -0
  103. package/dist/tests/bench/report.js +1885 -0
  104. package/dist/tests/bench/report.test.js +1038 -0
  105. package/dist/tests/bench/run-config.js +355 -0
  106. package/dist/tests/bench/run-config.test.js +298 -0
  107. package/dist/tests/bench/run-curate-test.js +32 -0
  108. package/dist/tests/bench/run-failing-tasks.js +56 -0
  109. package/dist/tests/bench/run-full-bench.js +51 -0
  110. package/dist/tests/bench/run-items36-targeted.js +69 -0
  111. package/dist/tests/bench/run-nano-quick.js +42 -0
  112. package/dist/tests/bench/run-waveg-targeted.js +62 -0
  113. package/dist/tests/bench/runner.js +699 -0
  114. package/dist/tests/bench/runner.test.js +958 -0
  115. package/dist/tests/bench/search-bridge.test.js +331 -0
  116. package/dist/tests/bench/tmp.js +131 -0
  117. package/dist/tests/bench/trajectory.js +116 -0
  118. package/dist/tests/bench/trajectory.test.js +127 -0
  119. package/dist/tests/bench/verifier.js +114 -0
  120. package/dist/tests/bench/verifier.test.js +118 -0
  121. package/dist/tests/bench/workflow-evaluator.js +557 -0
  122. package/dist/tests/bench/workflow-evaluator.test.js +421 -0
  123. package/dist/tests/bench/workflow-spec.js +345 -0
  124. package/dist/tests/bench/workflow-spec.test.js +363 -0
  125. package/dist/tests/bench/workflow-trace.js +472 -0
  126. package/dist/tests/bench/workflow-trace.test.js +254 -0
  127. package/dist/tests/benchmark-search-quality.js +536 -0
  128. package/dist/tests/benchmark-suite.js +1441 -0
  129. package/dist/tests/capture-cli.test.js +112 -0
  130. package/dist/tests/cli-errors.test.js +204 -0
  131. package/dist/tests/commands/events.test.js +370 -0
  132. package/dist/tests/commands/history.test.js +418 -0
  133. package/dist/tests/commands/import.test.js +103 -0
  134. package/dist/tests/commands/proposal-cli.test.js +209 -0
  135. package/dist/tests/commands/reflect-propose-cli.test.js +333 -0
  136. package/dist/tests/commands/remember.test.js +97 -0
  137. package/dist/tests/commands/scope-flags.test.js +300 -0
  138. package/dist/tests/commands/search.test.js +537 -0
  139. package/dist/tests/commands/show-indexer-parity.test.js +117 -0
  140. package/dist/tests/commands/show.test.js +294 -0
  141. package/dist/tests/common.test.js +266 -0
  142. package/dist/tests/completions.test.js +142 -0
  143. package/dist/tests/config-cli.test.js +193 -0
  144. package/dist/tests/config-llm-features.test.js +139 -0
  145. package/dist/tests/config.test.js +569 -0
  146. package/dist/tests/contracts/migration-baseline.test.js +43 -0
  147. package/dist/tests/contracts/reflect-propose-envelope.test.js +139 -0
  148. package/dist/tests/contracts/spec-helpers.js +46 -0
  149. package/dist/tests/contracts/v1-spec-section-11-proposal-queue.test.js +228 -0
  150. package/dist/tests/contracts/v1-spec-section-12-agent-config.test.js +56 -0
  151. package/dist/tests/contracts/v1-spec-section-13-lesson-type.test.js +34 -0
  152. package/dist/tests/contracts/v1-spec-section-14-llm-features.test.js +94 -0
  153. package/dist/tests/contracts/v1-spec-section-4-1-asset-types.test.js +39 -0
  154. package/dist/tests/contracts/v1-spec-section-4-2-quality-rules.test.js +44 -0
  155. package/dist/tests/contracts/v1-spec-section-5-configuration.test.js +47 -0
  156. package/dist/tests/contracts/v1-spec-section-6-orchestration.test.js +40 -0
  157. package/dist/tests/contracts/v1-spec-section-7-module-layout.test.js +58 -0
  158. package/dist/tests/contracts/v1-spec-section-8-extension-points.test.js +34 -0
  159. package/dist/tests/contracts/v1-spec-section-9-4-cli-surface.test.js +75 -0
  160. package/dist/tests/contracts/v1-spec-section-9-7-llm-agent-boundary.test.js +36 -0
  161. package/dist/tests/core/write-source.test.js +366 -0
  162. package/dist/tests/curate-command.test.js +87 -0
  163. package/dist/tests/db-scoring.test.js +201 -0
  164. package/dist/tests/db.test.js +654 -0
  165. package/dist/tests/distill-cli-flag.test.js +208 -0
  166. package/dist/tests/distill.test.js +515 -0
  167. package/dist/tests/docker-install.test.js +120 -0
  168. package/dist/tests/e2e.test.js +1419 -0
  169. package/dist/tests/embedder.test.js +340 -0
  170. package/dist/tests/embedding-model-config.test.js +379 -0
  171. package/dist/tests/feedback-command.test.js +172 -0
  172. package/dist/tests/file-context.test.js +552 -0
  173. package/dist/tests/fixtures/scripts/git/summarize-diff.js +9 -0
  174. package/dist/tests/fixtures/scripts/lint/eslint-check.js +7 -0
  175. package/dist/tests/fixtures/stashes/load.js +166 -0
  176. package/dist/tests/fixtures/stashes/load.test.js +97 -0
  177. package/dist/tests/fixtures/stashes/ranking-baseline/scripts/mem0-search.js +12 -0
  178. package/dist/tests/frontmatter.test.js +190 -0
  179. package/dist/tests/fts-field-weighting.test.js +254 -0
  180. package/dist/tests/fuzzy-search.test.js +230 -0
  181. package/dist/tests/git-provider-clone.test.js +45 -0
  182. package/dist/tests/github.test.js +161 -0
  183. package/dist/tests/graph-boost-ranking.test.js +305 -0
  184. package/dist/tests/graph-extraction.test.js +282 -0
  185. package/dist/tests/helpers/usage-events.js +8 -0
  186. package/dist/tests/index-pass-llm.test.js +161 -0
  187. package/dist/tests/indexer.test.js +570 -0
  188. package/dist/tests/info-command.test.js +166 -0
  189. package/dist/tests/init.test.js +69 -0
  190. package/dist/tests/install-script.test.js +246 -0
  191. package/dist/tests/integration/agent-real-profile.test.js +94 -0
  192. package/dist/tests/issue-36-repro.test.js +304 -0
  193. package/dist/tests/issues-191-194.test.js +160 -0
  194. package/dist/tests/lesson-lint.test.js +111 -0
  195. package/dist/tests/llm-client.test.js +115 -0
  196. package/dist/tests/llm-feature-gate.test.js +151 -0
  197. package/dist/tests/llm.test.js +139 -0
  198. package/dist/tests/lockfile.test.js +216 -0
  199. package/dist/tests/manifest.test.js +205 -0
  200. package/dist/tests/markdown.test.js +126 -0
  201. package/dist/tests/matchers-unit.test.js +189 -0
  202. package/dist/tests/memory-inference.test.js +299 -0
  203. package/dist/tests/merge-scoring.test.js +136 -0
  204. package/dist/tests/metadata.test.js +313 -0
  205. package/dist/tests/migration-help.test.js +89 -0
  206. package/dist/tests/origin-resolve.test.js +124 -0
  207. package/dist/tests/output-baseline.test.js +218 -0
  208. package/dist/tests/output-shapes-unit.test.js +478 -0
  209. package/dist/tests/parallel-search.test.js +272 -0
  210. package/dist/tests/parameter-metadata.test.js +365 -0
  211. package/dist/tests/paths.test.js +177 -0
  212. package/dist/tests/progressive-disclosure.test.js +280 -0
  213. package/dist/tests/proposals.test.js +279 -0
  214. package/dist/tests/proposed-quality.test.js +271 -0
  215. package/dist/tests/provider-registry.test.js +32 -0
  216. package/dist/tests/ranking-regression.test.js +548 -0
  217. package/dist/tests/reflect-propose.test.js +455 -0
  218. package/dist/tests/registry-build-index.test.js +394 -0
  219. package/dist/tests/registry-cli.test.js +290 -0
  220. package/dist/tests/registry-index-v2.test.js +430 -0
  221. package/dist/tests/registry-install.test.js +728 -0
  222. package/dist/tests/registry-providers/parity.test.js +189 -0
  223. package/dist/tests/registry-providers/skills-sh.test.js +309 -0
  224. package/dist/tests/registry-providers/static-index.test.js +238 -0
  225. package/dist/tests/registry-resolve.test.js +126 -0
  226. package/dist/tests/registry-search.test.js +923 -0
  227. package/dist/tests/remember-frontmatter.test.js +378 -0
  228. package/dist/tests/remember-unit.test.js +123 -0
  229. package/dist/tests/ripgrep-install.test.js +251 -0
  230. package/dist/tests/ripgrep-resolve.test.js +108 -0
  231. package/dist/tests/ripgrep.test.js +163 -0
  232. package/dist/tests/save-command.test.js +94 -0
  233. package/dist/tests/save-trust-qa-fixes.test.js +270 -0
  234. package/dist/tests/scoring-pipeline.test.js +648 -0
  235. package/dist/tests/search-include-proposed-cli.test.js +118 -0
  236. package/dist/tests/self-update.test.js +442 -0
  237. package/dist/tests/semantic-search-e2e.test.js +512 -0
  238. package/dist/tests/semantic-status.test.js +471 -0
  239. package/dist/tests/setup-run.integration.js +877 -0
  240. package/dist/tests/setup-wizard.test.js +198 -0
  241. package/dist/tests/setup.test.js +131 -0
  242. package/dist/tests/source-add.test.js +11 -0
  243. package/dist/tests/source-clone.test.js +254 -0
  244. package/dist/tests/source-manage.test.js +366 -0
  245. package/dist/tests/source-providers/filesystem.test.js +82 -0
  246. package/dist/tests/source-providers/git.test.js +252 -0
  247. package/dist/tests/source-providers/website.test.js +128 -0
  248. package/dist/tests/source-qa-fixes.test.js +286 -0
  249. package/dist/tests/source-registry.test.js +350 -0
  250. package/dist/tests/source-resolve.test.js +100 -0
  251. package/dist/tests/source-source.test.js +281 -0
  252. package/dist/tests/source.test.js +533 -0
  253. package/dist/tests/tar-utils-scan.test.js +73 -0
  254. package/dist/tests/toggle-components.test.js +73 -0
  255. package/dist/tests/usage-telemetry.test.js +265 -0
  256. package/dist/tests/utility-scoring.test.js +558 -0
  257. package/dist/tests/vault-load-error.test.js +78 -0
  258. package/dist/tests/vault-qa-fixes.test.js +194 -0
  259. package/dist/tests/vault.test.js +429 -0
  260. package/dist/tests/vector-search.test.js +608 -0
  261. package/dist/tests/walker.test.js +252 -0
  262. package/dist/tests/wave2-cluster-bc.test.js +228 -0
  263. package/dist/tests/wave2-cluster-d.test.js +180 -0
  264. package/dist/tests/wave2-cluster-e.test.js +179 -0
  265. package/dist/tests/wiki-qa-fixes.test.js +270 -0
  266. package/dist/tests/wiki.test.js +529 -0
  267. package/dist/tests/workflow-cli.test.js +271 -0
  268. package/dist/tests/workflow-markdown.test.js +171 -0
  269. package/dist/tests/workflow-path-escape.test.js +132 -0
  270. package/dist/tests/workflow-qa-fixes.test.js +395 -0
  271. package/dist/tests/workflows/indexer-rejection.test.js +213 -0
  272. package/docs/README.md +8 -0
  273. package/docs/migration/release-notes/0.7.0.md +244 -0
  274. package/package.json +2 -2
  275. package/dist/core/warn.js +0 -27
  276. package/dist/output/shapes.js +0 -212
  277. package/dist/output/text.js +0 -520
  278. /package/dist/{commands → src/commands}/completions.js +0 -0
  279. /package/dist/{commands → src/commands}/curate.js +0 -0
  280. /package/dist/{commands → src/commands}/info.js +0 -0
  281. /package/dist/{commands → src/commands}/init.js +0 -0
  282. /package/dist/{commands → src/commands}/install-audit.js +0 -0
  283. /package/dist/{commands → src/commands}/migration-help.js +0 -0
  284. /package/dist/{commands → src/commands}/source-clone.js +0 -0
  285. /package/dist/{commands → src/commands}/vault.js +0 -0
  286. /package/dist/{core → src/core}/asset-registry.js +0 -0
  287. /package/dist/{core → src/core}/frontmatter.js +0 -0
  288. /package/dist/{core → src/core}/markdown.js +0 -0
  289. /package/dist/{core → src/core}/paths.js +0 -0
  290. /package/dist/{indexer → src/indexer}/manifest.js +0 -0
  291. /package/dist/{indexer → src/indexer}/search-fields.js +0 -0
  292. /package/dist/{indexer → src/indexer}/semantic-status.js +0 -0
  293. /package/dist/{indexer → src/indexer}/usage-events.js +0 -0
  294. /package/dist/{indexer → src/indexer}/walker.js +0 -0
  295. /package/dist/{llm → src/llm}/embedder.js +0 -0
  296. /package/dist/{llm → src/llm}/embedders/cache.js +0 -0
  297. /package/dist/{llm → src/llm}/embedders/local.js +0 -0
  298. /package/dist/{llm → src/llm}/embedders/types.js +0 -0
  299. /package/dist/{llm → src/llm}/metadata-enhance.js +0 -0
  300. /package/dist/{output → src/output}/context.js +0 -0
  301. /package/dist/{registry → src/registry}/create-provider-registry.js +0 -0
  302. /package/dist/{registry → src/registry}/origin-resolve.js +0 -0
  303. /package/dist/{registry → src/registry}/providers/index.js +0 -0
  304. /package/dist/{registry → src/registry}/providers/skills-sh.js +0 -0
  305. /package/dist/{registry → src/registry}/providers/types.js +0 -0
  306. /package/dist/{registry → src/registry}/types.js +0 -0
  307. /package/dist/{setup → src/setup}/detect.js +0 -0
  308. /package/dist/{setup → src/setup}/ripgrep-install.js +0 -0
  309. /package/dist/{setup → src/setup}/ripgrep-resolve.js +0 -0
  310. /package/dist/{setup → src/setup}/steps.js +0 -0
  311. /package/dist/{sources → src/sources}/include.js +0 -0
  312. /package/dist/{sources → src/sources}/provider-factory.js +0 -0
  313. /package/dist/{sources → src/sources}/provider.js +0 -0
  314. /package/dist/{sources → src/sources}/providers/filesystem.js +0 -0
  315. /package/dist/{sources → src/sources}/providers/index.js +0 -0
  316. /package/dist/{sources → src/sources}/providers/install-types.js +0 -0
  317. /package/dist/{sources → src/sources}/providers/npm.js +0 -0
  318. /package/dist/{sources → src/sources}/providers/provider-utils.js +0 -0
  319. /package/dist/{sources → src/sources}/providers/sync-from-ref.js +0 -0
  320. /package/dist/{sources → src/sources}/providers/tar-utils.js +0 -0
  321. /package/dist/{sources → src/sources}/providers/website.js +0 -0
  322. /package/dist/{sources → src/sources}/resolve.js +0 -0
  323. /package/dist/{sources → src/sources}/types.js +0 -0
  324. /package/dist/{templates → src/templates}/wiki-templates.js +0 -0
  325. /package/dist/{version.js → src/version.js} +0 -0
  326. /package/dist/{workflows → src/workflows}/authoring.js +0 -0
  327. /package/dist/{workflows → src/workflows}/cli.js +0 -0
  328. /package/dist/{workflows → src/workflows}/db.js +0 -0
  329. /package/dist/{workflows → src/workflows}/document-cache.js +0 -0
  330. /package/dist/{workflows → src/workflows}/parser.js +0 -0
  331. /package/dist/{workflows → src/workflows}/renderer.js +0 -0
  332. /package/dist/{workflows → src/workflows}/schema.js +0 -0
  333. /package/dist/{workflows → src/workflows}/validator.js +0 -0
@@ -0,0 +1,355 @@
1
+ /**
2
+ * akm-bench run-config loader.
3
+ *
4
+ * A bench run config (`tests/bench/configs/*.json`) is a single-file
5
+ * description of a utility/evolve invocation: providers, default model,
6
+ * tasks, arms, seeds, budgets, parallel, baseline. Loading a config
7
+ * resolves the providers file (from explicit `providers` / `providersRef`
8
+ * fields, the `BENCH_OPENCODE_CONFIG` env var, or
9
+ * `${XDG_CONFIG_HOME:-~/.config}/akm/bench-providers.json`), looks up the
10
+ * effective default model, and resolves the task selector + baseline file
11
+ * paths.
12
+ *
13
+ * Self-contained — does not import from `src/` so the bench framework
14
+ * stays liftable to a standalone repo.
15
+ */
16
+ import fs from "node:fs";
17
+ import os from "node:os";
18
+ import path from "node:path";
19
+ import { listTasks, loadTask } from "./corpus";
20
+ import { BenchConfigError, loadOpencodeProviders, } from "./opencode-config";
21
+ import { benchMkdtemp } from "./tmp";
22
+ /**
23
+ * Resolve a path string supporting `~` expansion and `${VAR}` env-var
24
+ * expansion. Relative paths are resolved against `baseDir`.
25
+ */
26
+ export function resolvePathString(value, baseDir) {
27
+ let s = value;
28
+ // Expand ${VAR} and $VAR forms — matches the conventional shell forms.
29
+ s = s.replace(/\$\{([A-Za-z_][A-Za-z0-9_]*)\}/g, (_m, name) => process.env[name] ?? "");
30
+ s = s.replace(/\$([A-Za-z_][A-Za-z0-9_]*)/g, (_m, name) => process.env[name] ?? "");
31
+ // Tilde expansion. `~` alone or `~/...`; we don't support `~user/`.
32
+ if (s === "~")
33
+ s = os.homedir();
34
+ else if (s.startsWith("~/"))
35
+ s = path.join(os.homedir(), s.slice(2));
36
+ if (path.isAbsolute(s))
37
+ return s;
38
+ return path.resolve(baseDir, s);
39
+ }
40
+ /** Default per-operator providers location: `${XDG_CONFIG_HOME:-~/.config}/akm/bench-providers.json`. */
41
+ export function defaultUserProvidersPath() {
42
+ const xdg = process.env.XDG_CONFIG_HOME;
43
+ const root = xdg && xdg.length > 0 ? xdg : path.join(os.homedir(), ".config");
44
+ return path.join(root, "akm", "bench-providers.json");
45
+ }
46
+ /**
47
+ * Resolve the providers file using the §A discovery chain and load it.
48
+ *
49
+ * 1. `BENCH_OPENCODE_CONFIG` env var (absolute path).
50
+ * 2. `providers` inline in the config (materialised to a synthetic
51
+ * `LoadedOpencodeProviders` without touching disk).
52
+ * 3. `providersRef` in the config (with tilde / env-var expansion).
53
+ * 4. `${XDG_CONFIG_HOME:-~/.config}/akm/bench-providers.json`.
54
+ * 5. Throw — the caller is expected to map this to exit code 2.
55
+ *
56
+ * Returns `{ providers, source }` where `source` is the absolute path the
57
+ * providers came from (or `"<inline>"` for the inline case).
58
+ */
59
+ export function resolveProviders(config, configDir) {
60
+ // 1. BENCH_OPENCODE_CONFIG env var wins.
61
+ const envPath = process.env.BENCH_OPENCODE_CONFIG;
62
+ if (envPath && envPath.length > 0) {
63
+ return loadOpencodeProviders(path.isAbsolute(envPath) ? envPath : path.resolve(envPath));
64
+ }
65
+ // 2. Inline providers in the config.
66
+ if (config.providers !== undefined) {
67
+ if (config.providersRef !== undefined) {
68
+ throw new BenchConfigError("bench run config: only one of `providers` or `providersRef` may be set", true);
69
+ }
70
+ return materialiseInlineProviders(config);
71
+ }
72
+ // 3. Explicit providersRef.
73
+ if (config.providersRef !== undefined) {
74
+ const resolved = resolvePathString(config.providersRef, configDir);
75
+ return loadOpencodeProviders(resolved);
76
+ }
77
+ // 4. Per-operator default location.
78
+ const userPath = defaultUserProvidersPath();
79
+ if (fs.existsSync(userPath)) {
80
+ return loadOpencodeProviders(userPath);
81
+ }
82
+ // 5. Repo-local fallbacks — the same locations the legacy
83
+ // `discoverOpencodeProviders` checks. The gitignored `.local.json`
84
+ // overlay wins over the committed fixture so an operator's local
85
+ // overrides survive a `git pull` without needing a config edit.
86
+ const repoLocalPath = path.resolve(__dirname, "..", "fixtures", "bench", "opencode-providers.local.json");
87
+ if (fs.existsSync(repoLocalPath)) {
88
+ return loadOpencodeProviders(repoLocalPath);
89
+ }
90
+ const repoFixturePath = path.resolve(__dirname, "..", "fixtures", "bench", "opencode-providers.json");
91
+ if (fs.existsSync(repoFixturePath)) {
92
+ return loadOpencodeProviders(repoFixturePath);
93
+ }
94
+ // 6. No providers found.
95
+ throw new BenchConfigError(`bench run config: no opencode providers found. Set \`providers\` or \`providersRef\` in the config, set BENCH_OPENCODE_CONFIG, or create ${userPath}.`, true);
96
+ }
97
+ /**
98
+ * Build a `LoadedOpencodeProviders` from an inline `providers` map without
99
+ * round-tripping through disk. We still validate via `loadOpencodeProviders`
100
+ * by writing to a tmp file? No — that would risk leaving secrets on disk.
101
+ * Instead, do a minimal in-memory validation that matches what the on-disk
102
+ * loader checks (forbidden top-level keys are not applicable here, since
103
+ * the inline providers already live inside a `providers` object; but the
104
+ * credential heuristic still applies).
105
+ */
106
+ function materialiseInlineProviders(config) {
107
+ if (config.providers === null || typeof config.providers !== "object" || Array.isArray(config.providers)) {
108
+ throw new BenchConfigError("bench run config: `providers` must be an object", false);
109
+ }
110
+ // Reuse `loadOpencodeProviders` indirectly by stamping a synthetic
111
+ // BenchOpencodeProvidersFile — without touching disk we still want the
112
+ // credential scan applied. The simplest path is: write a tmp file mode
113
+ // 0o600 and load it, then unlink. That keeps the credential-scan logic
114
+ // co-located in opencode-config.ts.
115
+ const file = {
116
+ schemaVersion: 1,
117
+ providers: config.providers,
118
+ ...(config.defaultModel !== undefined ? { defaultModel: config.defaultModel } : {}),
119
+ };
120
+ // Per #276: bench tmp dirs MUST live under `${AKM_CACHE_DIR}/bench/`,
121
+ // never the OS-default tmp root. `benchMkdtemp` is the drop-in.
122
+ const tmpDir = benchMkdtemp("akm-bench-inline-");
123
+ const tmpPath = path.join(tmpDir, "providers.json");
124
+ try {
125
+ fs.writeFileSync(tmpPath, JSON.stringify(file), { mode: 0o600 });
126
+ const loaded = loadOpencodeProviders(tmpPath);
127
+ return { ...loaded, source: "<inline>" };
128
+ }
129
+ finally {
130
+ try {
131
+ fs.rmSync(tmpDir, { recursive: true, force: true });
132
+ }
133
+ catch {
134
+ // best-effort cleanup
135
+ }
136
+ }
137
+ }
138
+ /** Load + validate a baseline JSON file: `{ taskId: passRate (0..1) }`. */
139
+ export function loadBaseline(absPath) {
140
+ let raw;
141
+ try {
142
+ raw = fs.readFileSync(absPath, "utf8");
143
+ }
144
+ catch (err) {
145
+ throw new BenchConfigError(`bench run config: cannot read baseline file "${absPath}": ${err instanceof Error ? err.message : String(err)}`, true);
146
+ }
147
+ let parsed;
148
+ try {
149
+ parsed = JSON.parse(raw);
150
+ }
151
+ catch (err) {
152
+ throw new BenchConfigError(`bench run config: baseline file "${absPath}" is not valid JSON: ${err instanceof Error ? err.message : String(err)}`, false);
153
+ }
154
+ if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) {
155
+ throw new BenchConfigError(`bench run config: baseline file "${absPath}" must be a JSON object of taskId → passRate`, false);
156
+ }
157
+ const out = {};
158
+ for (const [key, value] of Object.entries(parsed)) {
159
+ if (typeof value !== "number" || !Number.isFinite(value) || value < 0 || value > 1) {
160
+ throw new BenchConfigError(`bench run config: baseline entry ${JSON.stringify(key)} in "${absPath}" must be a number in [0, 1]; got ${JSON.stringify(value)}`, false);
161
+ }
162
+ out[key] = value;
163
+ }
164
+ return out;
165
+ }
166
+ /**
167
+ * Resolve the `tasks` selector to a concrete `TaskMetadata[]` plus a slice
168
+ * label for the report's `corpus.slice` field.
169
+ */
170
+ export function resolveTasks(selector) {
171
+ // Default = "all" when the field is omitted entirely.
172
+ if (selector === undefined) {
173
+ return { tasks: listTasks(), slice: "all" };
174
+ }
175
+ if (typeof selector === "string") {
176
+ if (selector === "all" || selector === "train" || selector === "eval") {
177
+ const sliceFilter = selector === "all" ? undefined : selector;
178
+ const tasks = listTasks(sliceFilter ? { slice: sliceFilter } : {});
179
+ return { tasks, slice: selector };
180
+ }
181
+ // Single task id ("domain/name") — try direct lookup first.
182
+ if (selector.includes("/")) {
183
+ try {
184
+ return { tasks: [loadTask(selector)], slice: "all" };
185
+ }
186
+ catch {
187
+ // Fall through to "no match" error below.
188
+ }
189
+ throw new BenchConfigError(`bench run config: tasks: no task matched "${selector}"`, true);
190
+ }
191
+ // Domain prefix (no slash).
192
+ const all = listTasks();
193
+ const matches = all.filter((t) => t.domain === selector);
194
+ if (matches.length === 0) {
195
+ throw new BenchConfigError(`bench run config: tasks: no task matched domain "${selector}". Available domains: ${[...new Set(all.map((t) => t.domain))].sort().join(", ") || "(none)"}`, true);
196
+ }
197
+ return { tasks: matches, slice: "all" };
198
+ }
199
+ // Array of task ids.
200
+ if (selector.length === 0) {
201
+ throw new BenchConfigError("bench run config: tasks: array must be non-empty", true);
202
+ }
203
+ const out = [];
204
+ for (const id of selector) {
205
+ try {
206
+ out.push(loadTask(id));
207
+ }
208
+ catch {
209
+ throw new BenchConfigError(`bench run config: tasks: no task matched "${id}"`, true);
210
+ }
211
+ }
212
+ return { tasks: out, slice: "all" };
213
+ }
214
+ /**
215
+ * Validate the parsed config against the v1 schema (in-code, no JSON
216
+ * Schema runtime — keeps the bench self-contained). Throws BenchConfigError
217
+ * on the first violation.
218
+ */
219
+ function validateConfig(parsed, source) {
220
+ if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) {
221
+ throw new BenchConfigError(`bench run config: root of ${source} must be a JSON object`, false);
222
+ }
223
+ const obj = parsed;
224
+ if (obj.schemaVersion !== 1) {
225
+ throw new BenchConfigError(`bench run config: ${source}: unsupported schemaVersion ${JSON.stringify(obj.schemaVersion)}; expected 1`, false);
226
+ }
227
+ const allowed = new Set([
228
+ "$schema",
229
+ "schemaVersion",
230
+ "name",
231
+ "description",
232
+ "providers",
233
+ "providersRef",
234
+ "defaultModel",
235
+ "tasks",
236
+ "arms",
237
+ "seeds",
238
+ "budgetTokens",
239
+ "budgetWallMs",
240
+ "parallel",
241
+ "forceParallel",
242
+ "baseline",
243
+ ]);
244
+ for (const key of Object.keys(obj)) {
245
+ if (!allowed.has(key)) {
246
+ throw new BenchConfigError(`bench run config: ${source}: unknown field "${key}"`, false);
247
+ }
248
+ }
249
+ if (obj.providers !== undefined && obj.providersRef !== undefined) {
250
+ throw new BenchConfigError(`bench run config: ${source}: only one of "providers" or "providersRef" may be set`, true);
251
+ }
252
+ if (obj.tasks !== undefined) {
253
+ if (typeof obj.tasks !== "string" && !Array.isArray(obj.tasks)) {
254
+ throw new BenchConfigError(`bench run config: ${source}: "tasks" must be a string or array of strings`, false);
255
+ }
256
+ if (Array.isArray(obj.tasks)) {
257
+ for (const t of obj.tasks) {
258
+ if (typeof t !== "string") {
259
+ throw new BenchConfigError(`bench run config: ${source}: every entry in "tasks" must be a string`, false);
260
+ }
261
+ }
262
+ }
263
+ }
264
+ if (obj.arms !== undefined) {
265
+ if (!Array.isArray(obj.arms) || obj.arms.length === 0) {
266
+ throw new BenchConfigError(`bench run config: ${source}: "arms" must be a non-empty array`, false);
267
+ }
268
+ for (const a of obj.arms) {
269
+ if (a !== "noakm" && a !== "akm" && a !== "synthetic") {
270
+ throw new BenchConfigError(`bench run config: ${source}: invalid arm ${JSON.stringify(a)}; expected one of "noakm", "akm", "synthetic"`, false);
271
+ }
272
+ }
273
+ }
274
+ for (const numField of ["seeds", "budgetTokens", "budgetWallMs", "parallel"]) {
275
+ const val = obj[numField];
276
+ if (val !== undefined) {
277
+ if (typeof val !== "number" || !Number.isInteger(val) || val < 1) {
278
+ throw new BenchConfigError(`bench run config: ${source}: "${numField}" must be a positive integer`, false);
279
+ }
280
+ }
281
+ }
282
+ return obj;
283
+ }
284
+ /**
285
+ * Load and resolve a bench run config from disk.
286
+ *
287
+ * @param configPath Absolute or relative path to the config JSON file.
288
+ * @param overrides CLI-derived overrides applied on top of the config.
289
+ */
290
+ export function loadBenchRunConfig(configPath, overrides = {}) {
291
+ const absPath = path.isAbsolute(configPath) ? configPath : path.resolve(configPath);
292
+ if (!fs.existsSync(absPath)) {
293
+ throw new BenchConfigError(`bench run config: file not found: ${absPath}`, true);
294
+ }
295
+ let raw;
296
+ try {
297
+ raw = fs.readFileSync(absPath, "utf8");
298
+ }
299
+ catch (err) {
300
+ throw new BenchConfigError(`bench run config: cannot read ${absPath}: ${err instanceof Error ? err.message : String(err)}`, true);
301
+ }
302
+ let parsed;
303
+ try {
304
+ parsed = JSON.parse(raw);
305
+ }
306
+ catch (err) {
307
+ throw new BenchConfigError(`bench run config: ${absPath}: invalid JSON: ${err instanceof Error ? err.message : String(err)}`, false);
308
+ }
309
+ const config = validateConfig(parsed, absPath);
310
+ const configDir = path.dirname(absPath);
311
+ const providers = resolveProviders(config, configDir);
312
+ const envModel = process.env.BENCH_OPENCODE_MODEL;
313
+ const model = (envModel && envModel.length > 0 ? envModel : undefined) ?? config.defaultModel ?? providers.defaultModel;
314
+ if (!model) {
315
+ throw new BenchConfigError(`bench run config: ${absPath}: no model specified. Set "defaultModel" in the config, set "defaultModel" in the providers file, or set BENCH_OPENCODE_MODEL.`, true);
316
+ }
317
+ // Resolve tasks (with optional CLI list override).
318
+ let resolved = resolveTasks(config.tasks);
319
+ if (overrides.tasksList && overrides.tasksList.length > 0) {
320
+ const set = new Set(overrides.tasksList);
321
+ const filtered = resolved.tasks.filter((t) => set.has(t.id));
322
+ const missing = overrides.tasksList.filter((id) => !resolved.tasks.some((t) => t.id === id));
323
+ if (missing.length > 0) {
324
+ throw new BenchConfigError(`bench run config: --tasks override: no task in the config matched ${JSON.stringify(missing.join(", "))}`, true);
325
+ }
326
+ resolved = { tasks: filtered, slice: resolved.slice };
327
+ }
328
+ if (resolved.tasks.length === 0) {
329
+ throw new BenchConfigError(`bench run config: ${absPath}: task selector matched zero tasks`, true);
330
+ }
331
+ let baselineByTaskId;
332
+ if (config.baseline) {
333
+ const baselinePath = resolvePathString(config.baseline, configDir);
334
+ baselineByTaskId = loadBaseline(baselinePath);
335
+ }
336
+ const arms = config.arms ?? ["noakm", "akm"];
337
+ const seedsPerArm = overrides.seedsPerArm ?? config.seeds;
338
+ const parallel = overrides.parallel ?? config.parallel;
339
+ const name = config.name ?? path.basename(absPath, path.extname(absPath));
340
+ return {
341
+ source: absPath,
342
+ name,
343
+ providers,
344
+ model,
345
+ tasks: resolved.tasks,
346
+ arms,
347
+ ...(seedsPerArm !== undefined ? { seedsPerArm } : {}),
348
+ ...(config.budgetTokens !== undefined ? { budgetTokens: config.budgetTokens } : {}),
349
+ ...(config.budgetWallMs !== undefined ? { budgetWallMs: config.budgetWallMs } : {}),
350
+ ...(parallel !== undefined ? { parallel } : {}),
351
+ ...(config.forceParallel ? { forceParallel: true } : {}),
352
+ ...(baselineByTaskId ? { baselineByTaskId } : {}),
353
+ slice: resolved.slice,
354
+ };
355
+ }
@@ -0,0 +1,298 @@
1
+ /**
2
+ * Unit tests for the bench run-config loader (`tests/bench/run-config.ts`).
3
+ *
4
+ * Covers the parts that don't require spawning a process:
5
+ * - Schema validation (unknown fields, missing schemaVersion, bad arms).
6
+ * - Path resolution (~ expansion, ${VAR} expansion, relative vs absolute).
7
+ * - Provider discovery chain (env > inline > providersRef > XDG default).
8
+ * - Baseline-file loading + range checks.
9
+ * - Task selector resolution (slice / domain / id / array).
10
+ *
11
+ * The CLI-level dispatch is exercised by `cli.test.ts` via spawned bench
12
+ * runs — keep those for end-to-end coverage; this file is unit-grade.
13
+ */
14
+ import { afterEach, beforeEach, describe, expect, test } from "bun:test";
15
+ import fs from "node:fs";
16
+ import os from "node:os";
17
+ import path from "node:path";
18
+ import { defaultUserProvidersPath, loadBaseline, loadBenchRunConfig, resolvePathString } from "./run-config";
19
+ import { benchMkdtemp } from "./tmp";
20
+ const REPO_ROOT = path.resolve(__dirname, "..", "..");
21
+ let workDir;
22
+ let savedEnv;
23
+ beforeEach(() => {
24
+ // Per #276 invariant: bench tmp dirs live under `${AKM_CACHE_DIR}/bench/`,
25
+ // never the OS-default tmp root. `benchMkdtemp` is the drop-in.
26
+ workDir = benchMkdtemp("akm-bench-runconfig-test-");
27
+ savedEnv = {
28
+ BENCH_OPENCODE_CONFIG: process.env.BENCH_OPENCODE_CONFIG,
29
+ BENCH_OPENCODE_MODEL: process.env.BENCH_OPENCODE_MODEL,
30
+ AKM_TEST_VAR: process.env.AKM_TEST_VAR,
31
+ };
32
+ delete process.env.BENCH_OPENCODE_CONFIG;
33
+ delete process.env.BENCH_OPENCODE_MODEL;
34
+ });
35
+ afterEach(() => {
36
+ fs.rmSync(workDir, { recursive: true, force: true });
37
+ for (const [k, v] of Object.entries(savedEnv)) {
38
+ if (v === undefined)
39
+ delete process.env[k];
40
+ else
41
+ process.env[k] = v;
42
+ }
43
+ });
44
+ function writeProvidersFile(filePath, defaultModel = "p/m") {
45
+ fs.mkdirSync(path.dirname(filePath), { recursive: true });
46
+ fs.writeFileSync(filePath, JSON.stringify({
47
+ schemaVersion: 1,
48
+ defaultModel,
49
+ providers: { p: { npm: "@ai-sdk/openai-compatible" } },
50
+ }));
51
+ }
52
+ describe("resolvePathString", () => {
53
+ test("resolves a relative path against the supplied base dir", () => {
54
+ expect(resolvePathString("foo.json", "/work")).toBe("/work/foo.json");
55
+ });
56
+ test("returns absolute paths unchanged", () => {
57
+ expect(resolvePathString("/abs/path.json", "/work")).toBe("/abs/path.json");
58
+ });
59
+ test("expands `~` to the operator's home dir", () => {
60
+ expect(resolvePathString("~/.config/akm/foo.json", "/work")).toBe(path.join(os.homedir(), ".config/akm/foo.json"));
61
+ });
62
+ test("expands env-var references", () => {
63
+ // Build the input with concatenation rather than a string literal to avoid
64
+ // biome's noTemplateCurlyInString flag on the `\${VAR}` form.
65
+ process.env.AKM_TEST_VAR = "/somewhere";
66
+ const input = `${"$"}{AKM_TEST_VAR}/providers.json`;
67
+ expect(resolvePathString(input, "/work")).toBe("/somewhere/providers.json");
68
+ });
69
+ });
70
+ describe("defaultUserProvidersPath", () => {
71
+ test("respects XDG_CONFIG_HOME when set", () => {
72
+ const saved = process.env.XDG_CONFIG_HOME;
73
+ process.env.XDG_CONFIG_HOME = "/xdg-test";
74
+ try {
75
+ expect(defaultUserProvidersPath()).toBe("/xdg-test/akm/bench-providers.json");
76
+ }
77
+ finally {
78
+ if (saved === undefined)
79
+ delete process.env.XDG_CONFIG_HOME;
80
+ else
81
+ process.env.XDG_CONFIG_HOME = saved;
82
+ }
83
+ });
84
+ test("falls back to ~/.config when XDG_CONFIG_HOME is unset", () => {
85
+ const saved = process.env.XDG_CONFIG_HOME;
86
+ delete process.env.XDG_CONFIG_HOME;
87
+ try {
88
+ expect(defaultUserProvidersPath()).toBe(path.join(os.homedir(), ".config/akm/bench-providers.json"));
89
+ }
90
+ finally {
91
+ if (saved !== undefined)
92
+ process.env.XDG_CONFIG_HOME = saved;
93
+ }
94
+ });
95
+ });
96
+ describe("loadBaseline", () => {
97
+ test("loads a `{ taskId: passRate }` map", () => {
98
+ const filePath = path.join(workDir, "baseline.json");
99
+ fs.writeFileSync(filePath, JSON.stringify({ "domain/a": 0.8, "domain/b": 1.0 }));
100
+ expect(loadBaseline(filePath)).toEqual({ "domain/a": 0.8, "domain/b": 1.0 });
101
+ });
102
+ test("rejects pass rates outside [0, 1]", () => {
103
+ const filePath = path.join(workDir, "bad.json");
104
+ fs.writeFileSync(filePath, JSON.stringify({ "x/y": 1.5 }));
105
+ expect(() => loadBaseline(filePath)).toThrow(/must be a number in \[0, 1\]/);
106
+ });
107
+ test("rejects non-number values", () => {
108
+ const filePath = path.join(workDir, "non-number.json");
109
+ fs.writeFileSync(filePath, JSON.stringify({ "x/y": "not a number" }));
110
+ expect(() => loadBaseline(filePath)).toThrow(/must be a number/);
111
+ });
112
+ });
113
+ describe("loadBenchRunConfig — schema validation", () => {
114
+ test("rejects unknown top-level fields", () => {
115
+ const cfgPath = path.join(workDir, "bad.json");
116
+ fs.writeFileSync(cfgPath, JSON.stringify({ schemaVersion: 1, name: "x", weirdField: 42 }));
117
+ expect(() => loadBenchRunConfig(cfgPath)).toThrow(/unknown field "weirdField"/);
118
+ });
119
+ test("rejects missing schemaVersion", () => {
120
+ const cfgPath = path.join(workDir, "noversion.json");
121
+ fs.writeFileSync(cfgPath, JSON.stringify({ name: "x" }));
122
+ expect(() => loadBenchRunConfig(cfgPath)).toThrow(/unsupported schemaVersion/);
123
+ });
124
+ test("rejects providers AND providersRef both set", () => {
125
+ const cfgPath = path.join(workDir, "both.json");
126
+ fs.writeFileSync(cfgPath, JSON.stringify({
127
+ schemaVersion: 1,
128
+ providers: { p: { npm: "x" } },
129
+ providersRef: "./other.json",
130
+ }));
131
+ expect(() => loadBenchRunConfig(cfgPath)).toThrow(/only one of "providers" or "providersRef"/);
132
+ });
133
+ test("rejects bad arm values", () => {
134
+ const cfgPath = path.join(workDir, "badarm.json");
135
+ fs.writeFileSync(cfgPath, JSON.stringify({ schemaVersion: 1, arms: ["nope"] }));
136
+ expect(() => loadBenchRunConfig(cfgPath)).toThrow(/invalid arm/);
137
+ });
138
+ test("missing config file exits with usage error", () => {
139
+ expect(() => loadBenchRunConfig(path.join(workDir, "ghost.json"))).toThrow(/file not found/);
140
+ });
141
+ });
142
+ describe("loadBenchRunConfig — provider discovery", () => {
143
+ test("BENCH_OPENCODE_CONFIG env var wins over providersRef", () => {
144
+ const envProviders = path.join(workDir, "env-providers.json");
145
+ const refProviders = path.join(workDir, "ref-providers.json");
146
+ writeProvidersFile(envProviders, "env/model");
147
+ writeProvidersFile(refProviders, "ref/model");
148
+ process.env.BENCH_OPENCODE_CONFIG = envProviders;
149
+ const cfgPath = path.join(workDir, "config.json");
150
+ fs.writeFileSync(cfgPath, JSON.stringify({
151
+ schemaVersion: 1,
152
+ providersRef: "./ref-providers.json",
153
+ tasks: "all",
154
+ }));
155
+ // No tasks resolved so we can't actually load — just verify provider
156
+ // resolution. We restrict to a single committed task to satisfy the
157
+ // selector. The bench corpus exists at fixtures/bench/tasks; we use
158
+ // "all" as the selector and skip past the `tasks=0` exit by writing a
159
+ // selector that matches a real task.
160
+ fs.writeFileSync(cfgPath, JSON.stringify({
161
+ schemaVersion: 1,
162
+ providersRef: "./ref-providers.json",
163
+ tasks: ["drillbit/backup-policy"],
164
+ }));
165
+ const resolved = loadBenchRunConfig(cfgPath);
166
+ expect(resolved.providers.source).toBe(envProviders);
167
+ expect(resolved.model).toBe("env/model");
168
+ });
169
+ test("`providersRef` is resolved relative to the config file", () => {
170
+ const refProviders = path.join(workDir, "subdir", "providers.json");
171
+ writeProvidersFile(refProviders, "ref/model");
172
+ const cfgPath = path.join(workDir, "config.json");
173
+ fs.writeFileSync(cfgPath, JSON.stringify({
174
+ schemaVersion: 1,
175
+ providersRef: "./subdir/providers.json",
176
+ tasks: ["drillbit/backup-policy"],
177
+ }));
178
+ const resolved = loadBenchRunConfig(cfgPath);
179
+ expect(resolved.providers.source).toBe(refProviders);
180
+ expect(resolved.model).toBe("ref/model");
181
+ });
182
+ test("config `defaultModel` overrides the providers file's defaultModel", () => {
183
+ const refProviders = path.join(workDir, "providers.json");
184
+ writeProvidersFile(refProviders, "ref/model");
185
+ const cfgPath = path.join(workDir, "config.json");
186
+ fs.writeFileSync(cfgPath, JSON.stringify({
187
+ schemaVersion: 1,
188
+ providersRef: "./providers.json",
189
+ defaultModel: "config/model",
190
+ tasks: ["drillbit/backup-policy"],
191
+ }));
192
+ const resolved = loadBenchRunConfig(cfgPath);
193
+ expect(resolved.model).toBe("config/model");
194
+ });
195
+ test("BENCH_OPENCODE_MODEL env wins over both", () => {
196
+ const refProviders = path.join(workDir, "providers.json");
197
+ writeProvidersFile(refProviders, "ref/model");
198
+ process.env.BENCH_OPENCODE_MODEL = "env/model";
199
+ const cfgPath = path.join(workDir, "config.json");
200
+ fs.writeFileSync(cfgPath, JSON.stringify({
201
+ schemaVersion: 1,
202
+ providersRef: "./providers.json",
203
+ defaultModel: "config/model",
204
+ tasks: ["drillbit/backup-policy"],
205
+ }));
206
+ const resolved = loadBenchRunConfig(cfgPath);
207
+ expect(resolved.model).toBe("env/model");
208
+ });
209
+ });
210
+ describe("loadBenchRunConfig — task resolution", () => {
211
+ test("tasks=array selects exactly the listed ids", () => {
212
+ const refProviders = path.join(workDir, "providers.json");
213
+ writeProvidersFile(refProviders);
214
+ const cfgPath = path.join(workDir, "config.json");
215
+ fs.writeFileSync(cfgPath, JSON.stringify({
216
+ schemaVersion: 1,
217
+ providersRef: "./providers.json",
218
+ tasks: ["drillbit/backup-policy", "drillbit/canary-enable"],
219
+ }));
220
+ const resolved = loadBenchRunConfig(cfgPath);
221
+ expect(resolved.tasks.map((t) => t.id).sort()).toEqual(["drillbit/backup-policy", "drillbit/canary-enable"]);
222
+ });
223
+ test("tasks=domain matches every task whose domain matches", () => {
224
+ const refProviders = path.join(workDir, "providers.json");
225
+ writeProvidersFile(refProviders);
226
+ const cfgPath = path.join(workDir, "config.json");
227
+ fs.writeFileSync(cfgPath, JSON.stringify({
228
+ schemaVersion: 1,
229
+ providersRef: "./providers.json",
230
+ tasks: "drillbit",
231
+ }));
232
+ const resolved = loadBenchRunConfig(cfgPath);
233
+ expect(resolved.tasks.length).toBeGreaterThan(0);
234
+ for (const t of resolved.tasks)
235
+ expect(t.domain).toBe("drillbit");
236
+ });
237
+ test("tasks=single-id matches exactly that task", () => {
238
+ const refProviders = path.join(workDir, "providers.json");
239
+ writeProvidersFile(refProviders);
240
+ const cfgPath = path.join(workDir, "config.json");
241
+ fs.writeFileSync(cfgPath, JSON.stringify({
242
+ schemaVersion: 1,
243
+ providersRef: "./providers.json",
244
+ tasks: "drillbit/backup-policy",
245
+ }));
246
+ const resolved = loadBenchRunConfig(cfgPath);
247
+ expect(resolved.tasks.map((t) => t.id)).toEqual(["drillbit/backup-policy"]);
248
+ });
249
+ test("--tasks override (CLI) restricts to a subset of the config's selection", () => {
250
+ const refProviders = path.join(workDir, "providers.json");
251
+ writeProvidersFile(refProviders);
252
+ const cfgPath = path.join(workDir, "config.json");
253
+ fs.writeFileSync(cfgPath, JSON.stringify({
254
+ schemaVersion: 1,
255
+ providersRef: "./providers.json",
256
+ tasks: ["drillbit/backup-policy", "drillbit/canary-enable"],
257
+ }));
258
+ const resolved = loadBenchRunConfig(cfgPath, { tasksList: ["drillbit/canary-enable"] });
259
+ expect(resolved.tasks.map((t) => t.id)).toEqual(["drillbit/canary-enable"]);
260
+ });
261
+ test("baseline path is resolved relative to the config file", () => {
262
+ const refProviders = path.join(workDir, "providers.json");
263
+ writeProvidersFile(refProviders);
264
+ const baselinePath = path.join(workDir, "baseline.json");
265
+ fs.writeFileSync(baselinePath, JSON.stringify({ "drillbit/backup-policy": 0.8 }));
266
+ const cfgPath = path.join(workDir, "config.json");
267
+ fs.writeFileSync(cfgPath, JSON.stringify({
268
+ schemaVersion: 1,
269
+ providersRef: "./providers.json",
270
+ tasks: ["drillbit/backup-policy"],
271
+ baseline: "./baseline.json",
272
+ }));
273
+ const resolved = loadBenchRunConfig(cfgPath);
274
+ expect(resolved.baselineByTaskId).toEqual({ "drillbit/backup-policy": 0.8 });
275
+ });
276
+ });
277
+ describe("loadBenchRunConfig — committed configs validate", () => {
278
+ test("tests/bench/configs/nano-quick.json loads cleanly", () => {
279
+ const cfgPath = path.join(REPO_ROOT, "tests", "bench", "configs", "nano-quick.json");
280
+ const resolved = loadBenchRunConfig(cfgPath);
281
+ expect(resolved.name).toBe("nano-quick");
282
+ expect(resolved.arms).toEqual(["akm"]);
283
+ expect(resolved.seedsPerArm).toBe(2);
284
+ expect(resolved.tasks.length).toBe(5);
285
+ });
286
+ test("tests/bench/configs/full.json loads cleanly and carries the baseline", () => {
287
+ const cfgPath = path.join(REPO_ROOT, "tests", "bench", "configs", "full.json");
288
+ const resolved = loadBenchRunConfig(cfgPath);
289
+ expect(resolved.name).toBe("full");
290
+ expect(resolved.baselineByTaskId).toBeDefined();
291
+ expect(typeof resolved.baselineByTaskId?.["drillbit/backup-policy"]).toBe("number");
292
+ });
293
+ test("tests/bench/configs/curate-test.json restricts to one task", () => {
294
+ const cfgPath = path.join(REPO_ROOT, "tests", "bench", "configs", "curate-test.json");
295
+ const resolved = loadBenchRunConfig(cfgPath);
296
+ expect(resolved.tasks.map((t) => t.id)).toEqual(["inkwell/configure-scaling"]);
297
+ });
298
+ });