akm-cli 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (332) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/dist/{src/cli.js → cli.js} +22 -8
  3. package/dist/{src/commands → commands}/installed-stashes.js +1 -1
  4. package/dist/{src/commands → commands}/source-add.js +1 -1
  5. package/dist/{src/core → core}/common.js +16 -1
  6. package/dist/{src/core → core}/config.js +5 -2
  7. package/dist/{src/indexer → indexer}/db-search.js +16 -1
  8. package/dist/{src/indexer → indexer}/graph-extraction.js +5 -3
  9. package/dist/{src/indexer → indexer}/indexer.js +27 -11
  10. package/dist/{src/indexer → indexer}/memory-inference.js +47 -58
  11. package/dist/{src/indexer → indexer}/search-source.js +1 -1
  12. package/dist/{src/llm → llm}/client.js +61 -1
  13. package/dist/{src/llm → llm}/embedder.js +8 -5
  14. package/dist/{src/llm → llm}/embedders/local.js +8 -2
  15. package/dist/{src/llm → llm}/embedders/remote.js +4 -2
  16. package/dist/{src/llm → llm}/graph-extract.js +4 -4
  17. package/dist/llm/memory-infer.js +114 -0
  18. package/dist/{src/llm → llm}/metadata-enhance.js +2 -2
  19. package/dist/{src/output → output}/cli-hints.js +2 -0
  20. package/dist/{src/setup → setup}/setup.js +30 -20
  21. package/dist/sources/providers/website.js +27 -0
  22. package/dist/{src/sources/providers/website.js → sources/website-ingest.js} +38 -51
  23. package/docs/README.md +7 -0
  24. package/docs/migration/release-notes/0.7.0.md +14 -0
  25. package/package.json +11 -8
  26. package/dist/src/llm/memory-infer.js +0 -86
  27. package/dist/tests/add-website-source.test.js +0 -119
  28. package/dist/tests/agent/agent-config-loader.test.js +0 -70
  29. package/dist/tests/agent/agent-config.test.js +0 -221
  30. package/dist/tests/agent/agent-detect.test.js +0 -100
  31. package/dist/tests/agent/agent-spawn.test.js +0 -234
  32. package/dist/tests/agent-output.test.js +0 -186
  33. package/dist/tests/architecture/agent-no-llm-sdk-guard.test.js +0 -103
  34. package/dist/tests/architecture/agent-spawn-seam.test.js +0 -193
  35. package/dist/tests/architecture/llm-stateless-seam.test.js +0 -112
  36. package/dist/tests/asset-ref.test.js +0 -192
  37. package/dist/tests/asset-registry.test.js +0 -103
  38. package/dist/tests/asset-spec.test.js +0 -241
  39. package/dist/tests/bench/attribution.test.js +0 -996
  40. package/dist/tests/bench/cleanup-sigint.test.js +0 -83
  41. package/dist/tests/bench/cleanup.js +0 -234
  42. package/dist/tests/bench/cleanup.test.js +0 -166
  43. package/dist/tests/bench/cli.js +0 -1018
  44. package/dist/tests/bench/cli.test.js +0 -445
  45. package/dist/tests/bench/compare.test.js +0 -556
  46. package/dist/tests/bench/corpus.js +0 -317
  47. package/dist/tests/bench/corpus.test.js +0 -258
  48. package/dist/tests/bench/doctor.js +0 -525
  49. package/dist/tests/bench/driver.js +0 -401
  50. package/dist/tests/bench/driver.test.js +0 -584
  51. package/dist/tests/bench/environment.js +0 -233
  52. package/dist/tests/bench/environment.test.js +0 -199
  53. package/dist/tests/bench/evolve-metrics.js +0 -179
  54. package/dist/tests/bench/evolve-metrics.test.js +0 -187
  55. package/dist/tests/bench/evolve.js +0 -647
  56. package/dist/tests/bench/evolve.test.js +0 -624
  57. package/dist/tests/bench/failure-modes.test.js +0 -349
  58. package/dist/tests/bench/feedback-integrity.test.js +0 -457
  59. package/dist/tests/bench/leakage.test.js +0 -228
  60. package/dist/tests/bench/learning-curve.test.js +0 -134
  61. package/dist/tests/bench/metrics.js +0 -2395
  62. package/dist/tests/bench/metrics.test.js +0 -1150
  63. package/dist/tests/bench/no-os-tmpdir-invariant.test.js +0 -43
  64. package/dist/tests/bench/opencode-config.js +0 -194
  65. package/dist/tests/bench/opencode-config.test.js +0 -370
  66. package/dist/tests/bench/report.js +0 -1885
  67. package/dist/tests/bench/report.test.js +0 -1038
  68. package/dist/tests/bench/run-config.js +0 -355
  69. package/dist/tests/bench/run-config.test.js +0 -298
  70. package/dist/tests/bench/run-curate-test.js +0 -32
  71. package/dist/tests/bench/run-failing-tasks.js +0 -56
  72. package/dist/tests/bench/run-full-bench.js +0 -51
  73. package/dist/tests/bench/run-items36-targeted.js +0 -69
  74. package/dist/tests/bench/run-nano-quick.js +0 -42
  75. package/dist/tests/bench/run-waveg-targeted.js +0 -62
  76. package/dist/tests/bench/runner.js +0 -699
  77. package/dist/tests/bench/runner.test.js +0 -958
  78. package/dist/tests/bench/search-bridge.test.js +0 -331
  79. package/dist/tests/bench/tmp.js +0 -131
  80. package/dist/tests/bench/trajectory.js +0 -116
  81. package/dist/tests/bench/trajectory.test.js +0 -127
  82. package/dist/tests/bench/verifier.js +0 -114
  83. package/dist/tests/bench/verifier.test.js +0 -118
  84. package/dist/tests/bench/workflow-evaluator.js +0 -557
  85. package/dist/tests/bench/workflow-evaluator.test.js +0 -421
  86. package/dist/tests/bench/workflow-spec.js +0 -345
  87. package/dist/tests/bench/workflow-spec.test.js +0 -363
  88. package/dist/tests/bench/workflow-trace.js +0 -472
  89. package/dist/tests/bench/workflow-trace.test.js +0 -254
  90. package/dist/tests/benchmark-search-quality.js +0 -536
  91. package/dist/tests/benchmark-suite.js +0 -1441
  92. package/dist/tests/capture-cli.test.js +0 -112
  93. package/dist/tests/cli-errors.test.js +0 -204
  94. package/dist/tests/commands/events.test.js +0 -370
  95. package/dist/tests/commands/history.test.js +0 -418
  96. package/dist/tests/commands/import.test.js +0 -103
  97. package/dist/tests/commands/proposal-cli.test.js +0 -209
  98. package/dist/tests/commands/reflect-propose-cli.test.js +0 -333
  99. package/dist/tests/commands/remember.test.js +0 -97
  100. package/dist/tests/commands/scope-flags.test.js +0 -300
  101. package/dist/tests/commands/search.test.js +0 -537
  102. package/dist/tests/commands/show-indexer-parity.test.js +0 -117
  103. package/dist/tests/commands/show.test.js +0 -294
  104. package/dist/tests/common.test.js +0 -266
  105. package/dist/tests/completions.test.js +0 -142
  106. package/dist/tests/config-cli.test.js +0 -193
  107. package/dist/tests/config-llm-features.test.js +0 -139
  108. package/dist/tests/config.test.js +0 -569
  109. package/dist/tests/contracts/migration-baseline.test.js +0 -43
  110. package/dist/tests/contracts/reflect-propose-envelope.test.js +0 -139
  111. package/dist/tests/contracts/spec-helpers.js +0 -46
  112. package/dist/tests/contracts/v1-spec-section-11-proposal-queue.test.js +0 -228
  113. package/dist/tests/contracts/v1-spec-section-12-agent-config.test.js +0 -56
  114. package/dist/tests/contracts/v1-spec-section-13-lesson-type.test.js +0 -34
  115. package/dist/tests/contracts/v1-spec-section-14-llm-features.test.js +0 -94
  116. package/dist/tests/contracts/v1-spec-section-4-1-asset-types.test.js +0 -39
  117. package/dist/tests/contracts/v1-spec-section-4-2-quality-rules.test.js +0 -44
  118. package/dist/tests/contracts/v1-spec-section-5-configuration.test.js +0 -47
  119. package/dist/tests/contracts/v1-spec-section-6-orchestration.test.js +0 -40
  120. package/dist/tests/contracts/v1-spec-section-7-module-layout.test.js +0 -58
  121. package/dist/tests/contracts/v1-spec-section-8-extension-points.test.js +0 -34
  122. package/dist/tests/contracts/v1-spec-section-9-4-cli-surface.test.js +0 -75
  123. package/dist/tests/contracts/v1-spec-section-9-7-llm-agent-boundary.test.js +0 -36
  124. package/dist/tests/core/write-source.test.js +0 -366
  125. package/dist/tests/curate-command.test.js +0 -87
  126. package/dist/tests/db-scoring.test.js +0 -201
  127. package/dist/tests/db.test.js +0 -654
  128. package/dist/tests/distill-cli-flag.test.js +0 -208
  129. package/dist/tests/distill.test.js +0 -515
  130. package/dist/tests/docker-install.test.js +0 -120
  131. package/dist/tests/e2e.test.js +0 -1419
  132. package/dist/tests/embedder.test.js +0 -340
  133. package/dist/tests/embedding-model-config.test.js +0 -379
  134. package/dist/tests/feedback-command.test.js +0 -172
  135. package/dist/tests/file-context.test.js +0 -552
  136. package/dist/tests/fixtures/scripts/git/summarize-diff.js +0 -9
  137. package/dist/tests/fixtures/scripts/lint/eslint-check.js +0 -7
  138. package/dist/tests/fixtures/stashes/load.js +0 -166
  139. package/dist/tests/fixtures/stashes/load.test.js +0 -97
  140. package/dist/tests/fixtures/stashes/ranking-baseline/scripts/mem0-search.js +0 -12
  141. package/dist/tests/frontmatter.test.js +0 -190
  142. package/dist/tests/fts-field-weighting.test.js +0 -254
  143. package/dist/tests/fuzzy-search.test.js +0 -230
  144. package/dist/tests/git-provider-clone.test.js +0 -45
  145. package/dist/tests/github.test.js +0 -161
  146. package/dist/tests/graph-boost-ranking.test.js +0 -305
  147. package/dist/tests/graph-extraction.test.js +0 -282
  148. package/dist/tests/helpers/usage-events.js +0 -8
  149. package/dist/tests/index-pass-llm.test.js +0 -161
  150. package/dist/tests/indexer.test.js +0 -570
  151. package/dist/tests/info-command.test.js +0 -166
  152. package/dist/tests/init.test.js +0 -69
  153. package/dist/tests/install-script.test.js +0 -246
  154. package/dist/tests/integration/agent-real-profile.test.js +0 -94
  155. package/dist/tests/issue-36-repro.test.js +0 -304
  156. package/dist/tests/issues-191-194.test.js +0 -160
  157. package/dist/tests/lesson-lint.test.js +0 -111
  158. package/dist/tests/llm-client.test.js +0 -115
  159. package/dist/tests/llm-feature-gate.test.js +0 -151
  160. package/dist/tests/llm.test.js +0 -139
  161. package/dist/tests/lockfile.test.js +0 -216
  162. package/dist/tests/manifest.test.js +0 -205
  163. package/dist/tests/markdown.test.js +0 -126
  164. package/dist/tests/matchers-unit.test.js +0 -189
  165. package/dist/tests/memory-inference.test.js +0 -299
  166. package/dist/tests/merge-scoring.test.js +0 -136
  167. package/dist/tests/metadata.test.js +0 -313
  168. package/dist/tests/migration-help.test.js +0 -89
  169. package/dist/tests/origin-resolve.test.js +0 -124
  170. package/dist/tests/output-baseline.test.js +0 -218
  171. package/dist/tests/output-shapes-unit.test.js +0 -478
  172. package/dist/tests/parallel-search.test.js +0 -272
  173. package/dist/tests/parameter-metadata.test.js +0 -365
  174. package/dist/tests/paths.test.js +0 -177
  175. package/dist/tests/progressive-disclosure.test.js +0 -280
  176. package/dist/tests/proposals.test.js +0 -279
  177. package/dist/tests/proposed-quality.test.js +0 -271
  178. package/dist/tests/provider-registry.test.js +0 -32
  179. package/dist/tests/ranking-regression.test.js +0 -548
  180. package/dist/tests/reflect-propose.test.js +0 -455
  181. package/dist/tests/registry-build-index.test.js +0 -394
  182. package/dist/tests/registry-cli.test.js +0 -290
  183. package/dist/tests/registry-index-v2.test.js +0 -430
  184. package/dist/tests/registry-install.test.js +0 -728
  185. package/dist/tests/registry-providers/parity.test.js +0 -189
  186. package/dist/tests/registry-providers/skills-sh.test.js +0 -309
  187. package/dist/tests/registry-providers/static-index.test.js +0 -238
  188. package/dist/tests/registry-resolve.test.js +0 -126
  189. package/dist/tests/registry-search.test.js +0 -923
  190. package/dist/tests/remember-frontmatter.test.js +0 -378
  191. package/dist/tests/remember-unit.test.js +0 -123
  192. package/dist/tests/ripgrep-install.test.js +0 -251
  193. package/dist/tests/ripgrep-resolve.test.js +0 -108
  194. package/dist/tests/ripgrep.test.js +0 -163
  195. package/dist/tests/save-command.test.js +0 -94
  196. package/dist/tests/save-trust-qa-fixes.test.js +0 -270
  197. package/dist/tests/scoring-pipeline.test.js +0 -648
  198. package/dist/tests/search-include-proposed-cli.test.js +0 -118
  199. package/dist/tests/self-update.test.js +0 -442
  200. package/dist/tests/semantic-search-e2e.test.js +0 -512
  201. package/dist/tests/semantic-status.test.js +0 -471
  202. package/dist/tests/setup-run.integration.js +0 -877
  203. package/dist/tests/setup-wizard.test.js +0 -198
  204. package/dist/tests/setup.test.js +0 -131
  205. package/dist/tests/source-add.test.js +0 -11
  206. package/dist/tests/source-clone.test.js +0 -254
  207. package/dist/tests/source-manage.test.js +0 -366
  208. package/dist/tests/source-providers/filesystem.test.js +0 -82
  209. package/dist/tests/source-providers/git.test.js +0 -252
  210. package/dist/tests/source-providers/website.test.js +0 -128
  211. package/dist/tests/source-qa-fixes.test.js +0 -286
  212. package/dist/tests/source-registry.test.js +0 -350
  213. package/dist/tests/source-resolve.test.js +0 -100
  214. package/dist/tests/source-source.test.js +0 -281
  215. package/dist/tests/source.test.js +0 -533
  216. package/dist/tests/tar-utils-scan.test.js +0 -73
  217. package/dist/tests/toggle-components.test.js +0 -73
  218. package/dist/tests/usage-telemetry.test.js +0 -265
  219. package/dist/tests/utility-scoring.test.js +0 -558
  220. package/dist/tests/vault-load-error.test.js +0 -78
  221. package/dist/tests/vault-qa-fixes.test.js +0 -194
  222. package/dist/tests/vault.test.js +0 -429
  223. package/dist/tests/vector-search.test.js +0 -608
  224. package/dist/tests/walker.test.js +0 -252
  225. package/dist/tests/wave2-cluster-bc.test.js +0 -228
  226. package/dist/tests/wave2-cluster-d.test.js +0 -180
  227. package/dist/tests/wave2-cluster-e.test.js +0 -179
  228. package/dist/tests/wiki-qa-fixes.test.js +0 -270
  229. package/dist/tests/wiki.test.js +0 -529
  230. package/dist/tests/workflow-cli.test.js +0 -271
  231. package/dist/tests/workflow-markdown.test.js +0 -171
  232. package/dist/tests/workflow-path-escape.test.js +0 -132
  233. package/dist/tests/workflow-qa-fixes.test.js +0 -395
  234. package/dist/tests/workflows/indexer-rejection.test.js +0 -213
  235. /package/dist/{src/commands → commands}/completions.js +0 -0
  236. /package/dist/{src/commands → commands}/config-cli.js +0 -0
  237. /package/dist/{src/commands → commands}/curate.js +0 -0
  238. /package/dist/{src/commands → commands}/distill.js +0 -0
  239. /package/dist/{src/commands → commands}/events.js +0 -0
  240. /package/dist/{src/commands → commands}/history.js +0 -0
  241. /package/dist/{src/commands → commands}/info.js +0 -0
  242. /package/dist/{src/commands → commands}/init.js +0 -0
  243. /package/dist/{src/commands → commands}/install-audit.js +0 -0
  244. /package/dist/{src/commands → commands}/migration-help.js +0 -0
  245. /package/dist/{src/commands → commands}/proposal.js +0 -0
  246. /package/dist/{src/commands → commands}/propose.js +0 -0
  247. /package/dist/{src/commands → commands}/reflect.js +0 -0
  248. /package/dist/{src/commands → commands}/registry-search.js +0 -0
  249. /package/dist/{src/commands → commands}/remember.js +0 -0
  250. /package/dist/{src/commands → commands}/search.js +0 -0
  251. /package/dist/{src/commands → commands}/self-update.js +0 -0
  252. /package/dist/{src/commands → commands}/show.js +0 -0
  253. /package/dist/{src/commands → commands}/source-clone.js +0 -0
  254. /package/dist/{src/commands → commands}/source-manage.js +0 -0
  255. /package/dist/{src/commands → commands}/vault.js +0 -0
  256. /package/dist/{src/core → core}/asset-ref.js +0 -0
  257. /package/dist/{src/core → core}/asset-registry.js +0 -0
  258. /package/dist/{src/core → core}/asset-spec.js +0 -0
  259. /package/dist/{src/core → core}/errors.js +0 -0
  260. /package/dist/{src/core → core}/events.js +0 -0
  261. /package/dist/{src/core → core}/frontmatter.js +0 -0
  262. /package/dist/{src/core → core}/lesson-lint.js +0 -0
  263. /package/dist/{src/core → core}/markdown.js +0 -0
  264. /package/dist/{src/core → core}/paths.js +0 -0
  265. /package/dist/{src/core → core}/proposals.js +0 -0
  266. /package/dist/{src/core → core}/warn.js +0 -0
  267. /package/dist/{src/core → core}/write-source.js +0 -0
  268. /package/dist/{src/indexer → indexer}/db.js +0 -0
  269. /package/dist/{src/indexer → indexer}/file-context.js +0 -0
  270. /package/dist/{src/indexer → indexer}/graph-boost.js +0 -0
  271. /package/dist/{src/indexer → indexer}/manifest.js +0 -0
  272. /package/dist/{src/indexer → indexer}/matchers.js +0 -0
  273. /package/dist/{src/indexer → indexer}/metadata.js +0 -0
  274. /package/dist/{src/indexer → indexer}/search-fields.js +0 -0
  275. /package/dist/{src/indexer → indexer}/semantic-status.js +0 -0
  276. /package/dist/{src/indexer → indexer}/usage-events.js +0 -0
  277. /package/dist/{src/indexer → indexer}/walker.js +0 -0
  278. /package/dist/{src/integrations → integrations}/agent/config.js +0 -0
  279. /package/dist/{src/integrations → integrations}/agent/detect.js +0 -0
  280. /package/dist/{src/integrations → integrations}/agent/index.js +0 -0
  281. /package/dist/{src/integrations → integrations}/agent/profiles.js +0 -0
  282. /package/dist/{src/integrations → integrations}/agent/prompts.js +0 -0
  283. /package/dist/{src/integrations → integrations}/agent/spawn.js +0 -0
  284. /package/dist/{src/integrations → integrations}/github.js +0 -0
  285. /package/dist/{src/integrations → integrations}/lockfile.js +0 -0
  286. /package/dist/{src/llm → llm}/embedders/cache.js +0 -0
  287. /package/dist/{src/llm → llm}/embedders/types.js +0 -0
  288. /package/dist/{src/llm → llm}/feature-gate.js +0 -0
  289. /package/dist/{src/llm → llm}/index-passes.js +0 -0
  290. /package/dist/{src/output → output}/context.js +0 -0
  291. /package/dist/{src/output → output}/renderers.js +0 -0
  292. /package/dist/{src/output → output}/shapes.js +0 -0
  293. /package/dist/{src/output → output}/text.js +0 -0
  294. /package/dist/{src/registry → registry}/build-index.js +0 -0
  295. /package/dist/{src/registry → registry}/create-provider-registry.js +0 -0
  296. /package/dist/{src/registry → registry}/factory.js +0 -0
  297. /package/dist/{src/registry → registry}/origin-resolve.js +0 -0
  298. /package/dist/{src/registry → registry}/providers/index.js +0 -0
  299. /package/dist/{src/registry → registry}/providers/skills-sh.js +0 -0
  300. /package/dist/{src/registry → registry}/providers/static-index.js +0 -0
  301. /package/dist/{src/registry → registry}/providers/types.js +0 -0
  302. /package/dist/{src/registry → registry}/resolve.js +0 -0
  303. /package/dist/{src/registry → registry}/types.js +0 -0
  304. /package/dist/{src/setup → setup}/detect.js +0 -0
  305. /package/dist/{src/setup → setup}/ripgrep-install.js +0 -0
  306. /package/dist/{src/setup → setup}/ripgrep-resolve.js +0 -0
  307. /package/dist/{src/setup → setup}/steps.js +0 -0
  308. /package/dist/{src/sources → sources}/include.js +0 -0
  309. /package/dist/{src/sources → sources}/provider-factory.js +0 -0
  310. /package/dist/{src/sources → sources}/provider.js +0 -0
  311. /package/dist/{src/sources → sources}/providers/filesystem.js +0 -0
  312. /package/dist/{src/sources → sources}/providers/git.js +0 -0
  313. /package/dist/{src/sources → sources}/providers/index.js +0 -0
  314. /package/dist/{src/sources → sources}/providers/install-types.js +0 -0
  315. /package/dist/{src/sources → sources}/providers/npm.js +0 -0
  316. /package/dist/{src/sources → sources}/providers/provider-utils.js +0 -0
  317. /package/dist/{src/sources → sources}/providers/sync-from-ref.js +0 -0
  318. /package/dist/{src/sources → sources}/providers/tar-utils.js +0 -0
  319. /package/dist/{src/sources → sources}/resolve.js +0 -0
  320. /package/dist/{src/sources → sources}/types.js +0 -0
  321. /package/dist/{src/templates → templates}/wiki-templates.js +0 -0
  322. /package/dist/{src/version.js → version.js} +0 -0
  323. /package/dist/{src/wiki → wiki}/wiki.js +0 -0
  324. /package/dist/{src/workflows → workflows}/authoring.js +0 -0
  325. /package/dist/{src/workflows → workflows}/cli.js +0 -0
  326. /package/dist/{src/workflows → workflows}/db.js +0 -0
  327. /package/dist/{src/workflows → workflows}/document-cache.js +0 -0
  328. /package/dist/{src/workflows → workflows}/parser.js +0 -0
  329. /package/dist/{src/workflows → workflows}/renderer.js +0 -0
  330. /package/dist/{src/workflows → workflows}/runs.js +0 -0
  331. /package/dist/{src/workflows → workflows}/schema.js +0 -0
  332. /package/dist/{src/workflows → workflows}/validator.js +0 -0
@@ -1,355 +0,0 @@
1
- /**
2
- * akm-bench run-config loader.
3
- *
4
- * A bench run config (`tests/bench/configs/*.json`) is a single-file
5
- * description of a utility/evolve invocation: providers, default model,
6
- * tasks, arms, seeds, budgets, parallel, baseline. Loading a config
7
- * resolves the providers file (from explicit `providers` / `providersRef`
8
- * fields, the `BENCH_OPENCODE_CONFIG` env var, or
9
- * `${XDG_CONFIG_HOME:-~/.config}/akm/bench-providers.json`), looks up the
10
- * effective default model, and resolves the task selector + baseline file
11
- * paths.
12
- *
13
- * Self-contained — does not import from `src/` so the bench framework
14
- * stays liftable to a standalone repo.
15
- */
16
- import fs from "node:fs";
17
- import os from "node:os";
18
- import path from "node:path";
19
- import { listTasks, loadTask } from "./corpus";
20
- import { BenchConfigError, loadOpencodeProviders, } from "./opencode-config";
21
- import { benchMkdtemp } from "./tmp";
22
- /**
23
- * Resolve a path string supporting `~` expansion and `${VAR}` env-var
24
- * expansion. Relative paths are resolved against `baseDir`.
25
- */
26
- export function resolvePathString(value, baseDir) {
27
- let s = value;
28
- // Expand ${VAR} and $VAR forms — matches the conventional shell forms.
29
- s = s.replace(/\$\{([A-Za-z_][A-Za-z0-9_]*)\}/g, (_m, name) => process.env[name] ?? "");
30
- s = s.replace(/\$([A-Za-z_][A-Za-z0-9_]*)/g, (_m, name) => process.env[name] ?? "");
31
- // Tilde expansion. `~` alone or `~/...`; we don't support `~user/`.
32
- if (s === "~")
33
- s = os.homedir();
34
- else if (s.startsWith("~/"))
35
- s = path.join(os.homedir(), s.slice(2));
36
- if (path.isAbsolute(s))
37
- return s;
38
- return path.resolve(baseDir, s);
39
- }
40
- /** Default per-operator providers location: `${XDG_CONFIG_HOME:-~/.config}/akm/bench-providers.json`. */
41
- export function defaultUserProvidersPath() {
42
- const xdg = process.env.XDG_CONFIG_HOME;
43
- const root = xdg && xdg.length > 0 ? xdg : path.join(os.homedir(), ".config");
44
- return path.join(root, "akm", "bench-providers.json");
45
- }
46
- /**
47
- * Resolve the providers file using the §A discovery chain and load it.
48
- *
49
- * 1. `BENCH_OPENCODE_CONFIG` env var (absolute path).
50
- * 2. `providers` inline in the config (materialised to a synthetic
51
- * `LoadedOpencodeProviders` without touching disk).
52
- * 3. `providersRef` in the config (with tilde / env-var expansion).
53
- * 4. `${XDG_CONFIG_HOME:-~/.config}/akm/bench-providers.json`.
54
- * 5. Throw — the caller is expected to map this to exit code 2.
55
- *
56
- * Returns `{ providers, source }` where `source` is the absolute path the
57
- * providers came from (or `"<inline>"` for the inline case).
58
- */
59
- export function resolveProviders(config, configDir) {
60
- // 1. BENCH_OPENCODE_CONFIG env var wins.
61
- const envPath = process.env.BENCH_OPENCODE_CONFIG;
62
- if (envPath && envPath.length > 0) {
63
- return loadOpencodeProviders(path.isAbsolute(envPath) ? envPath : path.resolve(envPath));
64
- }
65
- // 2. Inline providers in the config.
66
- if (config.providers !== undefined) {
67
- if (config.providersRef !== undefined) {
68
- throw new BenchConfigError("bench run config: only one of `providers` or `providersRef` may be set", true);
69
- }
70
- return materialiseInlineProviders(config);
71
- }
72
- // 3. Explicit providersRef.
73
- if (config.providersRef !== undefined) {
74
- const resolved = resolvePathString(config.providersRef, configDir);
75
- return loadOpencodeProviders(resolved);
76
- }
77
- // 4. Per-operator default location.
78
- const userPath = defaultUserProvidersPath();
79
- if (fs.existsSync(userPath)) {
80
- return loadOpencodeProviders(userPath);
81
- }
82
- // 5. Repo-local fallbacks — the same locations the legacy
83
- // `discoverOpencodeProviders` checks. The gitignored `.local.json`
84
- // overlay wins over the committed fixture so an operator's local
85
- // overrides survive a `git pull` without needing a config edit.
86
- const repoLocalPath = path.resolve(__dirname, "..", "fixtures", "bench", "opencode-providers.local.json");
87
- if (fs.existsSync(repoLocalPath)) {
88
- return loadOpencodeProviders(repoLocalPath);
89
- }
90
- const repoFixturePath = path.resolve(__dirname, "..", "fixtures", "bench", "opencode-providers.json");
91
- if (fs.existsSync(repoFixturePath)) {
92
- return loadOpencodeProviders(repoFixturePath);
93
- }
94
- // 6. No providers found.
95
- throw new BenchConfigError(`bench run config: no opencode providers found. Set \`providers\` or \`providersRef\` in the config, set BENCH_OPENCODE_CONFIG, or create ${userPath}.`, true);
96
- }
97
- /**
98
- * Build a `LoadedOpencodeProviders` from an inline `providers` map without
99
- * round-tripping through disk. We still validate via `loadOpencodeProviders`
100
- * by writing to a tmp file? No — that would risk leaving secrets on disk.
101
- * Instead, do a minimal in-memory validation that matches what the on-disk
102
- * loader checks (forbidden top-level keys are not applicable here, since
103
- * the inline providers already live inside a `providers` object; but the
104
- * credential heuristic still applies).
105
- */
106
- function materialiseInlineProviders(config) {
107
- if (config.providers === null || typeof config.providers !== "object" || Array.isArray(config.providers)) {
108
- throw new BenchConfigError("bench run config: `providers` must be an object", false);
109
- }
110
- // Reuse `loadOpencodeProviders` indirectly by stamping a synthetic
111
- // BenchOpencodeProvidersFile — without touching disk we still want the
112
- // credential scan applied. The simplest path is: write a tmp file mode
113
- // 0o600 and load it, then unlink. That keeps the credential-scan logic
114
- // co-located in opencode-config.ts.
115
- const file = {
116
- schemaVersion: 1,
117
- providers: config.providers,
118
- ...(config.defaultModel !== undefined ? { defaultModel: config.defaultModel } : {}),
119
- };
120
- // Per #276: bench tmp dirs MUST live under `${AKM_CACHE_DIR}/bench/`,
121
- // never the OS-default tmp root. `benchMkdtemp` is the drop-in.
122
- const tmpDir = benchMkdtemp("akm-bench-inline-");
123
- const tmpPath = path.join(tmpDir, "providers.json");
124
- try {
125
- fs.writeFileSync(tmpPath, JSON.stringify(file), { mode: 0o600 });
126
- const loaded = loadOpencodeProviders(tmpPath);
127
- return { ...loaded, source: "<inline>" };
128
- }
129
- finally {
130
- try {
131
- fs.rmSync(tmpDir, { recursive: true, force: true });
132
- }
133
- catch {
134
- // best-effort cleanup
135
- }
136
- }
137
- }
138
- /** Load + validate a baseline JSON file: `{ taskId: passRate (0..1) }`. */
139
- export function loadBaseline(absPath) {
140
- let raw;
141
- try {
142
- raw = fs.readFileSync(absPath, "utf8");
143
- }
144
- catch (err) {
145
- throw new BenchConfigError(`bench run config: cannot read baseline file "${absPath}": ${err instanceof Error ? err.message : String(err)}`, true);
146
- }
147
- let parsed;
148
- try {
149
- parsed = JSON.parse(raw);
150
- }
151
- catch (err) {
152
- throw new BenchConfigError(`bench run config: baseline file "${absPath}" is not valid JSON: ${err instanceof Error ? err.message : String(err)}`, false);
153
- }
154
- if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) {
155
- throw new BenchConfigError(`bench run config: baseline file "${absPath}" must be a JSON object of taskId → passRate`, false);
156
- }
157
- const out = {};
158
- for (const [key, value] of Object.entries(parsed)) {
159
- if (typeof value !== "number" || !Number.isFinite(value) || value < 0 || value > 1) {
160
- throw new BenchConfigError(`bench run config: baseline entry ${JSON.stringify(key)} in "${absPath}" must be a number in [0, 1]; got ${JSON.stringify(value)}`, false);
161
- }
162
- out[key] = value;
163
- }
164
- return out;
165
- }
166
- /**
167
- * Resolve the `tasks` selector to a concrete `TaskMetadata[]` plus a slice
168
- * label for the report's `corpus.slice` field.
169
- */
170
- export function resolveTasks(selector) {
171
- // Default = "all" when the field is omitted entirely.
172
- if (selector === undefined) {
173
- return { tasks: listTasks(), slice: "all" };
174
- }
175
- if (typeof selector === "string") {
176
- if (selector === "all" || selector === "train" || selector === "eval") {
177
- const sliceFilter = selector === "all" ? undefined : selector;
178
- const tasks = listTasks(sliceFilter ? { slice: sliceFilter } : {});
179
- return { tasks, slice: selector };
180
- }
181
- // Single task id ("domain/name") — try direct lookup first.
182
- if (selector.includes("/")) {
183
- try {
184
- return { tasks: [loadTask(selector)], slice: "all" };
185
- }
186
- catch {
187
- // Fall through to "no match" error below.
188
- }
189
- throw new BenchConfigError(`bench run config: tasks: no task matched "${selector}"`, true);
190
- }
191
- // Domain prefix (no slash).
192
- const all = listTasks();
193
- const matches = all.filter((t) => t.domain === selector);
194
- if (matches.length === 0) {
195
- throw new BenchConfigError(`bench run config: tasks: no task matched domain "${selector}". Available domains: ${[...new Set(all.map((t) => t.domain))].sort().join(", ") || "(none)"}`, true);
196
- }
197
- return { tasks: matches, slice: "all" };
198
- }
199
- // Array of task ids.
200
- if (selector.length === 0) {
201
- throw new BenchConfigError("bench run config: tasks: array must be non-empty", true);
202
- }
203
- const out = [];
204
- for (const id of selector) {
205
- try {
206
- out.push(loadTask(id));
207
- }
208
- catch {
209
- throw new BenchConfigError(`bench run config: tasks: no task matched "${id}"`, true);
210
- }
211
- }
212
- return { tasks: out, slice: "all" };
213
- }
214
- /**
215
- * Validate the parsed config against the v1 schema (in-code, no JSON
216
- * Schema runtime — keeps the bench self-contained). Throws BenchConfigError
217
- * on the first violation.
218
- */
219
- function validateConfig(parsed, source) {
220
- if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) {
221
- throw new BenchConfigError(`bench run config: root of ${source} must be a JSON object`, false);
222
- }
223
- const obj = parsed;
224
- if (obj.schemaVersion !== 1) {
225
- throw new BenchConfigError(`bench run config: ${source}: unsupported schemaVersion ${JSON.stringify(obj.schemaVersion)}; expected 1`, false);
226
- }
227
- const allowed = new Set([
228
- "$schema",
229
- "schemaVersion",
230
- "name",
231
- "description",
232
- "providers",
233
- "providersRef",
234
- "defaultModel",
235
- "tasks",
236
- "arms",
237
- "seeds",
238
- "budgetTokens",
239
- "budgetWallMs",
240
- "parallel",
241
- "forceParallel",
242
- "baseline",
243
- ]);
244
- for (const key of Object.keys(obj)) {
245
- if (!allowed.has(key)) {
246
- throw new BenchConfigError(`bench run config: ${source}: unknown field "${key}"`, false);
247
- }
248
- }
249
- if (obj.providers !== undefined && obj.providersRef !== undefined) {
250
- throw new BenchConfigError(`bench run config: ${source}: only one of "providers" or "providersRef" may be set`, true);
251
- }
252
- if (obj.tasks !== undefined) {
253
- if (typeof obj.tasks !== "string" && !Array.isArray(obj.tasks)) {
254
- throw new BenchConfigError(`bench run config: ${source}: "tasks" must be a string or array of strings`, false);
255
- }
256
- if (Array.isArray(obj.tasks)) {
257
- for (const t of obj.tasks) {
258
- if (typeof t !== "string") {
259
- throw new BenchConfigError(`bench run config: ${source}: every entry in "tasks" must be a string`, false);
260
- }
261
- }
262
- }
263
- }
264
- if (obj.arms !== undefined) {
265
- if (!Array.isArray(obj.arms) || obj.arms.length === 0) {
266
- throw new BenchConfigError(`bench run config: ${source}: "arms" must be a non-empty array`, false);
267
- }
268
- for (const a of obj.arms) {
269
- if (a !== "noakm" && a !== "akm" && a !== "synthetic") {
270
- throw new BenchConfigError(`bench run config: ${source}: invalid arm ${JSON.stringify(a)}; expected one of "noakm", "akm", "synthetic"`, false);
271
- }
272
- }
273
- }
274
- for (const numField of ["seeds", "budgetTokens", "budgetWallMs", "parallel"]) {
275
- const val = obj[numField];
276
- if (val !== undefined) {
277
- if (typeof val !== "number" || !Number.isInteger(val) || val < 1) {
278
- throw new BenchConfigError(`bench run config: ${source}: "${numField}" must be a positive integer`, false);
279
- }
280
- }
281
- }
282
- return obj;
283
- }
284
- /**
285
- * Load and resolve a bench run config from disk.
286
- *
287
- * @param configPath Absolute or relative path to the config JSON file.
288
- * @param overrides CLI-derived overrides applied on top of the config.
289
- */
290
- export function loadBenchRunConfig(configPath, overrides = {}) {
291
- const absPath = path.isAbsolute(configPath) ? configPath : path.resolve(configPath);
292
- if (!fs.existsSync(absPath)) {
293
- throw new BenchConfigError(`bench run config: file not found: ${absPath}`, true);
294
- }
295
- let raw;
296
- try {
297
- raw = fs.readFileSync(absPath, "utf8");
298
- }
299
- catch (err) {
300
- throw new BenchConfigError(`bench run config: cannot read ${absPath}: ${err instanceof Error ? err.message : String(err)}`, true);
301
- }
302
- let parsed;
303
- try {
304
- parsed = JSON.parse(raw);
305
- }
306
- catch (err) {
307
- throw new BenchConfigError(`bench run config: ${absPath}: invalid JSON: ${err instanceof Error ? err.message : String(err)}`, false);
308
- }
309
- const config = validateConfig(parsed, absPath);
310
- const configDir = path.dirname(absPath);
311
- const providers = resolveProviders(config, configDir);
312
- const envModel = process.env.BENCH_OPENCODE_MODEL;
313
- const model = (envModel && envModel.length > 0 ? envModel : undefined) ?? config.defaultModel ?? providers.defaultModel;
314
- if (!model) {
315
- throw new BenchConfigError(`bench run config: ${absPath}: no model specified. Set "defaultModel" in the config, set "defaultModel" in the providers file, or set BENCH_OPENCODE_MODEL.`, true);
316
- }
317
- // Resolve tasks (with optional CLI list override).
318
- let resolved = resolveTasks(config.tasks);
319
- if (overrides.tasksList && overrides.tasksList.length > 0) {
320
- const set = new Set(overrides.tasksList);
321
- const filtered = resolved.tasks.filter((t) => set.has(t.id));
322
- const missing = overrides.tasksList.filter((id) => !resolved.tasks.some((t) => t.id === id));
323
- if (missing.length > 0) {
324
- throw new BenchConfigError(`bench run config: --tasks override: no task in the config matched ${JSON.stringify(missing.join(", "))}`, true);
325
- }
326
- resolved = { tasks: filtered, slice: resolved.slice };
327
- }
328
- if (resolved.tasks.length === 0) {
329
- throw new BenchConfigError(`bench run config: ${absPath}: task selector matched zero tasks`, true);
330
- }
331
- let baselineByTaskId;
332
- if (config.baseline) {
333
- const baselinePath = resolvePathString(config.baseline, configDir);
334
- baselineByTaskId = loadBaseline(baselinePath);
335
- }
336
- const arms = config.arms ?? ["noakm", "akm"];
337
- const seedsPerArm = overrides.seedsPerArm ?? config.seeds;
338
- const parallel = overrides.parallel ?? config.parallel;
339
- const name = config.name ?? path.basename(absPath, path.extname(absPath));
340
- return {
341
- source: absPath,
342
- name,
343
- providers,
344
- model,
345
- tasks: resolved.tasks,
346
- arms,
347
- ...(seedsPerArm !== undefined ? { seedsPerArm } : {}),
348
- ...(config.budgetTokens !== undefined ? { budgetTokens: config.budgetTokens } : {}),
349
- ...(config.budgetWallMs !== undefined ? { budgetWallMs: config.budgetWallMs } : {}),
350
- ...(parallel !== undefined ? { parallel } : {}),
351
- ...(config.forceParallel ? { forceParallel: true } : {}),
352
- ...(baselineByTaskId ? { baselineByTaskId } : {}),
353
- slice: resolved.slice,
354
- };
355
- }
@@ -1,298 +0,0 @@
1
- /**
2
- * Unit tests for the bench run-config loader (`tests/bench/run-config.ts`).
3
- *
4
- * Covers the parts that don't require spawning a process:
5
- * - Schema validation (unknown fields, missing schemaVersion, bad arms).
6
- * - Path resolution (~ expansion, ${VAR} expansion, relative vs absolute).
7
- * - Provider discovery chain (env > inline > providersRef > XDG default).
8
- * - Baseline-file loading + range checks.
9
- * - Task selector resolution (slice / domain / id / array).
10
- *
11
- * The CLI-level dispatch is exercised by `cli.test.ts` via spawned bench
12
- * runs — keep those for end-to-end coverage; this file is unit-grade.
13
- */
14
- import { afterEach, beforeEach, describe, expect, test } from "bun:test";
15
- import fs from "node:fs";
16
- import os from "node:os";
17
- import path from "node:path";
18
- import { defaultUserProvidersPath, loadBaseline, loadBenchRunConfig, resolvePathString } from "./run-config";
19
- import { benchMkdtemp } from "./tmp";
20
- const REPO_ROOT = path.resolve(__dirname, "..", "..");
21
- let workDir;
22
- let savedEnv;
23
- beforeEach(() => {
24
- // Per #276 invariant: bench tmp dirs live under `${AKM_CACHE_DIR}/bench/`,
25
- // never the OS-default tmp root. `benchMkdtemp` is the drop-in.
26
- workDir = benchMkdtemp("akm-bench-runconfig-test-");
27
- savedEnv = {
28
- BENCH_OPENCODE_CONFIG: process.env.BENCH_OPENCODE_CONFIG,
29
- BENCH_OPENCODE_MODEL: process.env.BENCH_OPENCODE_MODEL,
30
- AKM_TEST_VAR: process.env.AKM_TEST_VAR,
31
- };
32
- delete process.env.BENCH_OPENCODE_CONFIG;
33
- delete process.env.BENCH_OPENCODE_MODEL;
34
- });
35
- afterEach(() => {
36
- fs.rmSync(workDir, { recursive: true, force: true });
37
- for (const [k, v] of Object.entries(savedEnv)) {
38
- if (v === undefined)
39
- delete process.env[k];
40
- else
41
- process.env[k] = v;
42
- }
43
- });
44
- function writeProvidersFile(filePath, defaultModel = "p/m") {
45
- fs.mkdirSync(path.dirname(filePath), { recursive: true });
46
- fs.writeFileSync(filePath, JSON.stringify({
47
- schemaVersion: 1,
48
- defaultModel,
49
- providers: { p: { npm: "@ai-sdk/openai-compatible" } },
50
- }));
51
- }
52
- describe("resolvePathString", () => {
53
- test("resolves a relative path against the supplied base dir", () => {
54
- expect(resolvePathString("foo.json", "/work")).toBe("/work/foo.json");
55
- });
56
- test("returns absolute paths unchanged", () => {
57
- expect(resolvePathString("/abs/path.json", "/work")).toBe("/abs/path.json");
58
- });
59
- test("expands `~` to the operator's home dir", () => {
60
- expect(resolvePathString("~/.config/akm/foo.json", "/work")).toBe(path.join(os.homedir(), ".config/akm/foo.json"));
61
- });
62
- test("expands env-var references", () => {
63
- // Build the input with concatenation rather than a string literal to avoid
64
- // biome's noTemplateCurlyInString flag on the `\${VAR}` form.
65
- process.env.AKM_TEST_VAR = "/somewhere";
66
- const input = `${"$"}{AKM_TEST_VAR}/providers.json`;
67
- expect(resolvePathString(input, "/work")).toBe("/somewhere/providers.json");
68
- });
69
- });
70
- describe("defaultUserProvidersPath", () => {
71
- test("respects XDG_CONFIG_HOME when set", () => {
72
- const saved = process.env.XDG_CONFIG_HOME;
73
- process.env.XDG_CONFIG_HOME = "/xdg-test";
74
- try {
75
- expect(defaultUserProvidersPath()).toBe("/xdg-test/akm/bench-providers.json");
76
- }
77
- finally {
78
- if (saved === undefined)
79
- delete process.env.XDG_CONFIG_HOME;
80
- else
81
- process.env.XDG_CONFIG_HOME = saved;
82
- }
83
- });
84
- test("falls back to ~/.config when XDG_CONFIG_HOME is unset", () => {
85
- const saved = process.env.XDG_CONFIG_HOME;
86
- delete process.env.XDG_CONFIG_HOME;
87
- try {
88
- expect(defaultUserProvidersPath()).toBe(path.join(os.homedir(), ".config/akm/bench-providers.json"));
89
- }
90
- finally {
91
- if (saved !== undefined)
92
- process.env.XDG_CONFIG_HOME = saved;
93
- }
94
- });
95
- });
96
- describe("loadBaseline", () => {
97
- test("loads a `{ taskId: passRate }` map", () => {
98
- const filePath = path.join(workDir, "baseline.json");
99
- fs.writeFileSync(filePath, JSON.stringify({ "domain/a": 0.8, "domain/b": 1.0 }));
100
- expect(loadBaseline(filePath)).toEqual({ "domain/a": 0.8, "domain/b": 1.0 });
101
- });
102
- test("rejects pass rates outside [0, 1]", () => {
103
- const filePath = path.join(workDir, "bad.json");
104
- fs.writeFileSync(filePath, JSON.stringify({ "x/y": 1.5 }));
105
- expect(() => loadBaseline(filePath)).toThrow(/must be a number in \[0, 1\]/);
106
- });
107
- test("rejects non-number values", () => {
108
- const filePath = path.join(workDir, "non-number.json");
109
- fs.writeFileSync(filePath, JSON.stringify({ "x/y": "not a number" }));
110
- expect(() => loadBaseline(filePath)).toThrow(/must be a number/);
111
- });
112
- });
113
- describe("loadBenchRunConfig — schema validation", () => {
114
- test("rejects unknown top-level fields", () => {
115
- const cfgPath = path.join(workDir, "bad.json");
116
- fs.writeFileSync(cfgPath, JSON.stringify({ schemaVersion: 1, name: "x", weirdField: 42 }));
117
- expect(() => loadBenchRunConfig(cfgPath)).toThrow(/unknown field "weirdField"/);
118
- });
119
- test("rejects missing schemaVersion", () => {
120
- const cfgPath = path.join(workDir, "noversion.json");
121
- fs.writeFileSync(cfgPath, JSON.stringify({ name: "x" }));
122
- expect(() => loadBenchRunConfig(cfgPath)).toThrow(/unsupported schemaVersion/);
123
- });
124
- test("rejects providers AND providersRef both set", () => {
125
- const cfgPath = path.join(workDir, "both.json");
126
- fs.writeFileSync(cfgPath, JSON.stringify({
127
- schemaVersion: 1,
128
- providers: { p: { npm: "x" } },
129
- providersRef: "./other.json",
130
- }));
131
- expect(() => loadBenchRunConfig(cfgPath)).toThrow(/only one of "providers" or "providersRef"/);
132
- });
133
- test("rejects bad arm values", () => {
134
- const cfgPath = path.join(workDir, "badarm.json");
135
- fs.writeFileSync(cfgPath, JSON.stringify({ schemaVersion: 1, arms: ["nope"] }));
136
- expect(() => loadBenchRunConfig(cfgPath)).toThrow(/invalid arm/);
137
- });
138
- test("missing config file exits with usage error", () => {
139
- expect(() => loadBenchRunConfig(path.join(workDir, "ghost.json"))).toThrow(/file not found/);
140
- });
141
- });
142
- describe("loadBenchRunConfig — provider discovery", () => {
143
- test("BENCH_OPENCODE_CONFIG env var wins over providersRef", () => {
144
- const envProviders = path.join(workDir, "env-providers.json");
145
- const refProviders = path.join(workDir, "ref-providers.json");
146
- writeProvidersFile(envProviders, "env/model");
147
- writeProvidersFile(refProviders, "ref/model");
148
- process.env.BENCH_OPENCODE_CONFIG = envProviders;
149
- const cfgPath = path.join(workDir, "config.json");
150
- fs.writeFileSync(cfgPath, JSON.stringify({
151
- schemaVersion: 1,
152
- providersRef: "./ref-providers.json",
153
- tasks: "all",
154
- }));
155
- // No tasks resolved so we can't actually load — just verify provider
156
- // resolution. We restrict to a single committed task to satisfy the
157
- // selector. The bench corpus exists at fixtures/bench/tasks; we use
158
- // "all" as the selector and skip past the `tasks=0` exit by writing a
159
- // selector that matches a real task.
160
- fs.writeFileSync(cfgPath, JSON.stringify({
161
- schemaVersion: 1,
162
- providersRef: "./ref-providers.json",
163
- tasks: ["drillbit/backup-policy"],
164
- }));
165
- const resolved = loadBenchRunConfig(cfgPath);
166
- expect(resolved.providers.source).toBe(envProviders);
167
- expect(resolved.model).toBe("env/model");
168
- });
169
- test("`providersRef` is resolved relative to the config file", () => {
170
- const refProviders = path.join(workDir, "subdir", "providers.json");
171
- writeProvidersFile(refProviders, "ref/model");
172
- const cfgPath = path.join(workDir, "config.json");
173
- fs.writeFileSync(cfgPath, JSON.stringify({
174
- schemaVersion: 1,
175
- providersRef: "./subdir/providers.json",
176
- tasks: ["drillbit/backup-policy"],
177
- }));
178
- const resolved = loadBenchRunConfig(cfgPath);
179
- expect(resolved.providers.source).toBe(refProviders);
180
- expect(resolved.model).toBe("ref/model");
181
- });
182
- test("config `defaultModel` overrides the providers file's defaultModel", () => {
183
- const refProviders = path.join(workDir, "providers.json");
184
- writeProvidersFile(refProviders, "ref/model");
185
- const cfgPath = path.join(workDir, "config.json");
186
- fs.writeFileSync(cfgPath, JSON.stringify({
187
- schemaVersion: 1,
188
- providersRef: "./providers.json",
189
- defaultModel: "config/model",
190
- tasks: ["drillbit/backup-policy"],
191
- }));
192
- const resolved = loadBenchRunConfig(cfgPath);
193
- expect(resolved.model).toBe("config/model");
194
- });
195
- test("BENCH_OPENCODE_MODEL env wins over both", () => {
196
- const refProviders = path.join(workDir, "providers.json");
197
- writeProvidersFile(refProviders, "ref/model");
198
- process.env.BENCH_OPENCODE_MODEL = "env/model";
199
- const cfgPath = path.join(workDir, "config.json");
200
- fs.writeFileSync(cfgPath, JSON.stringify({
201
- schemaVersion: 1,
202
- providersRef: "./providers.json",
203
- defaultModel: "config/model",
204
- tasks: ["drillbit/backup-policy"],
205
- }));
206
- const resolved = loadBenchRunConfig(cfgPath);
207
- expect(resolved.model).toBe("env/model");
208
- });
209
- });
210
- describe("loadBenchRunConfig — task resolution", () => {
211
- test("tasks=array selects exactly the listed ids", () => {
212
- const refProviders = path.join(workDir, "providers.json");
213
- writeProvidersFile(refProviders);
214
- const cfgPath = path.join(workDir, "config.json");
215
- fs.writeFileSync(cfgPath, JSON.stringify({
216
- schemaVersion: 1,
217
- providersRef: "./providers.json",
218
- tasks: ["drillbit/backup-policy", "drillbit/canary-enable"],
219
- }));
220
- const resolved = loadBenchRunConfig(cfgPath);
221
- expect(resolved.tasks.map((t) => t.id).sort()).toEqual(["drillbit/backup-policy", "drillbit/canary-enable"]);
222
- });
223
- test("tasks=domain matches every task whose domain matches", () => {
224
- const refProviders = path.join(workDir, "providers.json");
225
- writeProvidersFile(refProviders);
226
- const cfgPath = path.join(workDir, "config.json");
227
- fs.writeFileSync(cfgPath, JSON.stringify({
228
- schemaVersion: 1,
229
- providersRef: "./providers.json",
230
- tasks: "drillbit",
231
- }));
232
- const resolved = loadBenchRunConfig(cfgPath);
233
- expect(resolved.tasks.length).toBeGreaterThan(0);
234
- for (const t of resolved.tasks)
235
- expect(t.domain).toBe("drillbit");
236
- });
237
- test("tasks=single-id matches exactly that task", () => {
238
- const refProviders = path.join(workDir, "providers.json");
239
- writeProvidersFile(refProviders);
240
- const cfgPath = path.join(workDir, "config.json");
241
- fs.writeFileSync(cfgPath, JSON.stringify({
242
- schemaVersion: 1,
243
- providersRef: "./providers.json",
244
- tasks: "drillbit/backup-policy",
245
- }));
246
- const resolved = loadBenchRunConfig(cfgPath);
247
- expect(resolved.tasks.map((t) => t.id)).toEqual(["drillbit/backup-policy"]);
248
- });
249
- test("--tasks override (CLI) restricts to a subset of the config's selection", () => {
250
- const refProviders = path.join(workDir, "providers.json");
251
- writeProvidersFile(refProviders);
252
- const cfgPath = path.join(workDir, "config.json");
253
- fs.writeFileSync(cfgPath, JSON.stringify({
254
- schemaVersion: 1,
255
- providersRef: "./providers.json",
256
- tasks: ["drillbit/backup-policy", "drillbit/canary-enable"],
257
- }));
258
- const resolved = loadBenchRunConfig(cfgPath, { tasksList: ["drillbit/canary-enable"] });
259
- expect(resolved.tasks.map((t) => t.id)).toEqual(["drillbit/canary-enable"]);
260
- });
261
- test("baseline path is resolved relative to the config file", () => {
262
- const refProviders = path.join(workDir, "providers.json");
263
- writeProvidersFile(refProviders);
264
- const baselinePath = path.join(workDir, "baseline.json");
265
- fs.writeFileSync(baselinePath, JSON.stringify({ "drillbit/backup-policy": 0.8 }));
266
- const cfgPath = path.join(workDir, "config.json");
267
- fs.writeFileSync(cfgPath, JSON.stringify({
268
- schemaVersion: 1,
269
- providersRef: "./providers.json",
270
- tasks: ["drillbit/backup-policy"],
271
- baseline: "./baseline.json",
272
- }));
273
- const resolved = loadBenchRunConfig(cfgPath);
274
- expect(resolved.baselineByTaskId).toEqual({ "drillbit/backup-policy": 0.8 });
275
- });
276
- });
277
- describe("loadBenchRunConfig — committed configs validate", () => {
278
- test("tests/bench/configs/nano-quick.json loads cleanly", () => {
279
- const cfgPath = path.join(REPO_ROOT, "tests", "bench", "configs", "nano-quick.json");
280
- const resolved = loadBenchRunConfig(cfgPath);
281
- expect(resolved.name).toBe("nano-quick");
282
- expect(resolved.arms).toEqual(["akm"]);
283
- expect(resolved.seedsPerArm).toBe(2);
284
- expect(resolved.tasks.length).toBe(5);
285
- });
286
- test("tests/bench/configs/full.json loads cleanly and carries the baseline", () => {
287
- const cfgPath = path.join(REPO_ROOT, "tests", "bench", "configs", "full.json");
288
- const resolved = loadBenchRunConfig(cfgPath);
289
- expect(resolved.name).toBe("full");
290
- expect(resolved.baselineByTaskId).toBeDefined();
291
- expect(typeof resolved.baselineByTaskId?.["drillbit/backup-policy"]).toBe("number");
292
- });
293
- test("tests/bench/configs/curate-test.json restricts to one task", () => {
294
- const cfgPath = path.join(REPO_ROOT, "tests", "bench", "configs", "curate-test.json");
295
- const resolved = loadBenchRunConfig(cfgPath);
296
- expect(resolved.tasks.map((t) => t.id)).toEqual(["inkwell/configure-scaling"]);
297
- });
298
- });