akm-cli 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (327) hide show
  1. package/package.json +8 -8
  2. package/dist/tests/add-website-source.test.js +0 -119
  3. package/dist/tests/agent/agent-config-loader.test.js +0 -70
  4. package/dist/tests/agent/agent-config.test.js +0 -221
  5. package/dist/tests/agent/agent-detect.test.js +0 -100
  6. package/dist/tests/agent/agent-spawn.test.js +0 -234
  7. package/dist/tests/agent-output.test.js +0 -186
  8. package/dist/tests/architecture/agent-no-llm-sdk-guard.test.js +0 -103
  9. package/dist/tests/architecture/agent-spawn-seam.test.js +0 -193
  10. package/dist/tests/architecture/llm-stateless-seam.test.js +0 -112
  11. package/dist/tests/asset-ref.test.js +0 -192
  12. package/dist/tests/asset-registry.test.js +0 -103
  13. package/dist/tests/asset-spec.test.js +0 -241
  14. package/dist/tests/bench/attribution.test.js +0 -996
  15. package/dist/tests/bench/cleanup-sigint.test.js +0 -83
  16. package/dist/tests/bench/cleanup.js +0 -234
  17. package/dist/tests/bench/cleanup.test.js +0 -166
  18. package/dist/tests/bench/cli.js +0 -1018
  19. package/dist/tests/bench/cli.test.js +0 -445
  20. package/dist/tests/bench/compare.test.js +0 -556
  21. package/dist/tests/bench/corpus.js +0 -317
  22. package/dist/tests/bench/corpus.test.js +0 -258
  23. package/dist/tests/bench/doctor.js +0 -525
  24. package/dist/tests/bench/driver.js +0 -401
  25. package/dist/tests/bench/driver.test.js +0 -584
  26. package/dist/tests/bench/environment.js +0 -233
  27. package/dist/tests/bench/environment.test.js +0 -199
  28. package/dist/tests/bench/evolve-metrics.js +0 -179
  29. package/dist/tests/bench/evolve-metrics.test.js +0 -187
  30. package/dist/tests/bench/evolve.js +0 -647
  31. package/dist/tests/bench/evolve.test.js +0 -624
  32. package/dist/tests/bench/failure-modes.test.js +0 -349
  33. package/dist/tests/bench/feedback-integrity.test.js +0 -457
  34. package/dist/tests/bench/leakage.test.js +0 -228
  35. package/dist/tests/bench/learning-curve.test.js +0 -134
  36. package/dist/tests/bench/metrics.js +0 -2395
  37. package/dist/tests/bench/metrics.test.js +0 -1150
  38. package/dist/tests/bench/no-os-tmpdir-invariant.test.js +0 -43
  39. package/dist/tests/bench/opencode-config.js +0 -194
  40. package/dist/tests/bench/opencode-config.test.js +0 -370
  41. package/dist/tests/bench/report.js +0 -1885
  42. package/dist/tests/bench/report.test.js +0 -1038
  43. package/dist/tests/bench/run-config.js +0 -355
  44. package/dist/tests/bench/run-config.test.js +0 -298
  45. package/dist/tests/bench/run-curate-test.js +0 -32
  46. package/dist/tests/bench/run-failing-tasks.js +0 -56
  47. package/dist/tests/bench/run-full-bench.js +0 -51
  48. package/dist/tests/bench/run-items36-targeted.js +0 -69
  49. package/dist/tests/bench/run-nano-quick.js +0 -42
  50. package/dist/tests/bench/run-waveg-targeted.js +0 -62
  51. package/dist/tests/bench/runner.js +0 -699
  52. package/dist/tests/bench/runner.test.js +0 -958
  53. package/dist/tests/bench/search-bridge.test.js +0 -331
  54. package/dist/tests/bench/tmp.js +0 -131
  55. package/dist/tests/bench/trajectory.js +0 -116
  56. package/dist/tests/bench/trajectory.test.js +0 -127
  57. package/dist/tests/bench/verifier.js +0 -114
  58. package/dist/tests/bench/verifier.test.js +0 -118
  59. package/dist/tests/bench/workflow-evaluator.js +0 -557
  60. package/dist/tests/bench/workflow-evaluator.test.js +0 -421
  61. package/dist/tests/bench/workflow-spec.js +0 -345
  62. package/dist/tests/bench/workflow-spec.test.js +0 -363
  63. package/dist/tests/bench/workflow-trace.js +0 -472
  64. package/dist/tests/bench/workflow-trace.test.js +0 -254
  65. package/dist/tests/benchmark-search-quality.js +0 -536
  66. package/dist/tests/benchmark-suite.js +0 -1441
  67. package/dist/tests/capture-cli.test.js +0 -112
  68. package/dist/tests/cli-errors.test.js +0 -204
  69. package/dist/tests/commands/events.test.js +0 -370
  70. package/dist/tests/commands/history.test.js +0 -418
  71. package/dist/tests/commands/import.test.js +0 -103
  72. package/dist/tests/commands/proposal-cli.test.js +0 -209
  73. package/dist/tests/commands/reflect-propose-cli.test.js +0 -333
  74. package/dist/tests/commands/remember.test.js +0 -97
  75. package/dist/tests/commands/scope-flags.test.js +0 -300
  76. package/dist/tests/commands/search.test.js +0 -537
  77. package/dist/tests/commands/show-indexer-parity.test.js +0 -117
  78. package/dist/tests/commands/show.test.js +0 -294
  79. package/dist/tests/common.test.js +0 -266
  80. package/dist/tests/completions.test.js +0 -142
  81. package/dist/tests/config-cli.test.js +0 -193
  82. package/dist/tests/config-llm-features.test.js +0 -139
  83. package/dist/tests/config.test.js +0 -569
  84. package/dist/tests/contracts/migration-baseline.test.js +0 -43
  85. package/dist/tests/contracts/reflect-propose-envelope.test.js +0 -139
  86. package/dist/tests/contracts/spec-helpers.js +0 -46
  87. package/dist/tests/contracts/v1-spec-section-11-proposal-queue.test.js +0 -228
  88. package/dist/tests/contracts/v1-spec-section-12-agent-config.test.js +0 -56
  89. package/dist/tests/contracts/v1-spec-section-13-lesson-type.test.js +0 -34
  90. package/dist/tests/contracts/v1-spec-section-14-llm-features.test.js +0 -94
  91. package/dist/tests/contracts/v1-spec-section-4-1-asset-types.test.js +0 -39
  92. package/dist/tests/contracts/v1-spec-section-4-2-quality-rules.test.js +0 -44
  93. package/dist/tests/contracts/v1-spec-section-5-configuration.test.js +0 -47
  94. package/dist/tests/contracts/v1-spec-section-6-orchestration.test.js +0 -40
  95. package/dist/tests/contracts/v1-spec-section-7-module-layout.test.js +0 -58
  96. package/dist/tests/contracts/v1-spec-section-8-extension-points.test.js +0 -34
  97. package/dist/tests/contracts/v1-spec-section-9-4-cli-surface.test.js +0 -75
  98. package/dist/tests/contracts/v1-spec-section-9-7-llm-agent-boundary.test.js +0 -36
  99. package/dist/tests/core/write-source.test.js +0 -366
  100. package/dist/tests/curate-command.test.js +0 -87
  101. package/dist/tests/db-scoring.test.js +0 -201
  102. package/dist/tests/db.test.js +0 -654
  103. package/dist/tests/distill-cli-flag.test.js +0 -208
  104. package/dist/tests/distill.test.js +0 -515
  105. package/dist/tests/docker-install.test.js +0 -120
  106. package/dist/tests/e2e.test.js +0 -1419
  107. package/dist/tests/embedder.test.js +0 -340
  108. package/dist/tests/embedding-model-config.test.js +0 -379
  109. package/dist/tests/feedback-command.test.js +0 -172
  110. package/dist/tests/file-context.test.js +0 -552
  111. package/dist/tests/fixtures/scripts/git/summarize-diff.js +0 -9
  112. package/dist/tests/fixtures/scripts/lint/eslint-check.js +0 -7
  113. package/dist/tests/fixtures/stashes/load.js +0 -166
  114. package/dist/tests/fixtures/stashes/load.test.js +0 -97
  115. package/dist/tests/fixtures/stashes/ranking-baseline/scripts/mem0-search.js +0 -12
  116. package/dist/tests/frontmatter.test.js +0 -190
  117. package/dist/tests/fts-field-weighting.test.js +0 -254
  118. package/dist/tests/fuzzy-search.test.js +0 -230
  119. package/dist/tests/git-provider-clone.test.js +0 -45
  120. package/dist/tests/github.test.js +0 -161
  121. package/dist/tests/graph-boost-ranking.test.js +0 -305
  122. package/dist/tests/graph-extraction.test.js +0 -282
  123. package/dist/tests/helpers/usage-events.js +0 -8
  124. package/dist/tests/index-pass-llm.test.js +0 -161
  125. package/dist/tests/indexer.test.js +0 -570
  126. package/dist/tests/info-command.test.js +0 -166
  127. package/dist/tests/init.test.js +0 -69
  128. package/dist/tests/install-script.test.js +0 -246
  129. package/dist/tests/integration/agent-real-profile.test.js +0 -94
  130. package/dist/tests/issue-36-repro.test.js +0 -304
  131. package/dist/tests/issues-191-194.test.js +0 -160
  132. package/dist/tests/lesson-lint.test.js +0 -111
  133. package/dist/tests/llm-client.test.js +0 -115
  134. package/dist/tests/llm-feature-gate.test.js +0 -151
  135. package/dist/tests/llm.test.js +0 -139
  136. package/dist/tests/lockfile.test.js +0 -216
  137. package/dist/tests/manifest.test.js +0 -205
  138. package/dist/tests/markdown.test.js +0 -126
  139. package/dist/tests/matchers-unit.test.js +0 -189
  140. package/dist/tests/memory-inference.test.js +0 -299
  141. package/dist/tests/merge-scoring.test.js +0 -136
  142. package/dist/tests/metadata.test.js +0 -313
  143. package/dist/tests/migration-help.test.js +0 -89
  144. package/dist/tests/origin-resolve.test.js +0 -124
  145. package/dist/tests/output-baseline.test.js +0 -218
  146. package/dist/tests/output-shapes-unit.test.js +0 -478
  147. package/dist/tests/parallel-search.test.js +0 -272
  148. package/dist/tests/parameter-metadata.test.js +0 -365
  149. package/dist/tests/paths.test.js +0 -177
  150. package/dist/tests/progressive-disclosure.test.js +0 -280
  151. package/dist/tests/proposals.test.js +0 -279
  152. package/dist/tests/proposed-quality.test.js +0 -271
  153. package/dist/tests/provider-registry.test.js +0 -32
  154. package/dist/tests/ranking-regression.test.js +0 -548
  155. package/dist/tests/reflect-propose.test.js +0 -455
  156. package/dist/tests/registry-build-index.test.js +0 -394
  157. package/dist/tests/registry-cli.test.js +0 -290
  158. package/dist/tests/registry-index-v2.test.js +0 -430
  159. package/dist/tests/registry-install.test.js +0 -728
  160. package/dist/tests/registry-providers/parity.test.js +0 -189
  161. package/dist/tests/registry-providers/skills-sh.test.js +0 -309
  162. package/dist/tests/registry-providers/static-index.test.js +0 -238
  163. package/dist/tests/registry-resolve.test.js +0 -126
  164. package/dist/tests/registry-search.test.js +0 -923
  165. package/dist/tests/remember-frontmatter.test.js +0 -378
  166. package/dist/tests/remember-unit.test.js +0 -123
  167. package/dist/tests/ripgrep-install.test.js +0 -251
  168. package/dist/tests/ripgrep-resolve.test.js +0 -108
  169. package/dist/tests/ripgrep.test.js +0 -163
  170. package/dist/tests/save-command.test.js +0 -94
  171. package/dist/tests/save-trust-qa-fixes.test.js +0 -270
  172. package/dist/tests/scoring-pipeline.test.js +0 -648
  173. package/dist/tests/search-include-proposed-cli.test.js +0 -118
  174. package/dist/tests/self-update.test.js +0 -442
  175. package/dist/tests/semantic-search-e2e.test.js +0 -512
  176. package/dist/tests/semantic-status.test.js +0 -471
  177. package/dist/tests/setup-run.integration.js +0 -877
  178. package/dist/tests/setup-wizard.test.js +0 -198
  179. package/dist/tests/setup.test.js +0 -131
  180. package/dist/tests/source-add.test.js +0 -11
  181. package/dist/tests/source-clone.test.js +0 -254
  182. package/dist/tests/source-manage.test.js +0 -366
  183. package/dist/tests/source-providers/filesystem.test.js +0 -82
  184. package/dist/tests/source-providers/git.test.js +0 -252
  185. package/dist/tests/source-providers/website.test.js +0 -128
  186. package/dist/tests/source-qa-fixes.test.js +0 -286
  187. package/dist/tests/source-registry.test.js +0 -350
  188. package/dist/tests/source-resolve.test.js +0 -100
  189. package/dist/tests/source-source.test.js +0 -281
  190. package/dist/tests/source.test.js +0 -533
  191. package/dist/tests/tar-utils-scan.test.js +0 -73
  192. package/dist/tests/toggle-components.test.js +0 -73
  193. package/dist/tests/usage-telemetry.test.js +0 -265
  194. package/dist/tests/utility-scoring.test.js +0 -558
  195. package/dist/tests/vault-load-error.test.js +0 -78
  196. package/dist/tests/vault-qa-fixes.test.js +0 -194
  197. package/dist/tests/vault.test.js +0 -429
  198. package/dist/tests/vector-search.test.js +0 -608
  199. package/dist/tests/walker.test.js +0 -252
  200. package/dist/tests/wave2-cluster-bc.test.js +0 -228
  201. package/dist/tests/wave2-cluster-d.test.js +0 -180
  202. package/dist/tests/wave2-cluster-e.test.js +0 -179
  203. package/dist/tests/wiki-qa-fixes.test.js +0 -270
  204. package/dist/tests/wiki.test.js +0 -529
  205. package/dist/tests/workflow-cli.test.js +0 -271
  206. package/dist/tests/workflow-markdown.test.js +0 -171
  207. package/dist/tests/workflow-path-escape.test.js +0 -132
  208. package/dist/tests/workflow-qa-fixes.test.js +0 -395
  209. package/dist/tests/workflows/indexer-rejection.test.js +0 -213
  210. /package/dist/{src/cli.js → cli.js} +0 -0
  211. /package/dist/{src/commands → commands}/completions.js +0 -0
  212. /package/dist/{src/commands → commands}/config-cli.js +0 -0
  213. /package/dist/{src/commands → commands}/curate.js +0 -0
  214. /package/dist/{src/commands → commands}/distill.js +0 -0
  215. /package/dist/{src/commands → commands}/events.js +0 -0
  216. /package/dist/{src/commands → commands}/history.js +0 -0
  217. /package/dist/{src/commands → commands}/info.js +0 -0
  218. /package/dist/{src/commands → commands}/init.js +0 -0
  219. /package/dist/{src/commands → commands}/install-audit.js +0 -0
  220. /package/dist/{src/commands → commands}/installed-stashes.js +0 -0
  221. /package/dist/{src/commands → commands}/migration-help.js +0 -0
  222. /package/dist/{src/commands → commands}/proposal.js +0 -0
  223. /package/dist/{src/commands → commands}/propose.js +0 -0
  224. /package/dist/{src/commands → commands}/reflect.js +0 -0
  225. /package/dist/{src/commands → commands}/registry-search.js +0 -0
  226. /package/dist/{src/commands → commands}/remember.js +0 -0
  227. /package/dist/{src/commands → commands}/search.js +0 -0
  228. /package/dist/{src/commands → commands}/self-update.js +0 -0
  229. /package/dist/{src/commands → commands}/show.js +0 -0
  230. /package/dist/{src/commands → commands}/source-add.js +0 -0
  231. /package/dist/{src/commands → commands}/source-clone.js +0 -0
  232. /package/dist/{src/commands → commands}/source-manage.js +0 -0
  233. /package/dist/{src/commands → commands}/vault.js +0 -0
  234. /package/dist/{src/core → core}/asset-ref.js +0 -0
  235. /package/dist/{src/core → core}/asset-registry.js +0 -0
  236. /package/dist/{src/core → core}/asset-spec.js +0 -0
  237. /package/dist/{src/core → core}/common.js +0 -0
  238. /package/dist/{src/core → core}/config.js +0 -0
  239. /package/dist/{src/core → core}/errors.js +0 -0
  240. /package/dist/{src/core → core}/events.js +0 -0
  241. /package/dist/{src/core → core}/frontmatter.js +0 -0
  242. /package/dist/{src/core → core}/lesson-lint.js +0 -0
  243. /package/dist/{src/core → core}/markdown.js +0 -0
  244. /package/dist/{src/core → core}/paths.js +0 -0
  245. /package/dist/{src/core → core}/proposals.js +0 -0
  246. /package/dist/{src/core → core}/warn.js +0 -0
  247. /package/dist/{src/core → core}/write-source.js +0 -0
  248. /package/dist/{src/indexer → indexer}/db-search.js +0 -0
  249. /package/dist/{src/indexer → indexer}/db.js +0 -0
  250. /package/dist/{src/indexer → indexer}/file-context.js +0 -0
  251. /package/dist/{src/indexer → indexer}/graph-boost.js +0 -0
  252. /package/dist/{src/indexer → indexer}/graph-extraction.js +0 -0
  253. /package/dist/{src/indexer → indexer}/indexer.js +0 -0
  254. /package/dist/{src/indexer → indexer}/manifest.js +0 -0
  255. /package/dist/{src/indexer → indexer}/matchers.js +0 -0
  256. /package/dist/{src/indexer → indexer}/memory-inference.js +0 -0
  257. /package/dist/{src/indexer → indexer}/metadata.js +0 -0
  258. /package/dist/{src/indexer → indexer}/search-fields.js +0 -0
  259. /package/dist/{src/indexer → indexer}/search-source.js +0 -0
  260. /package/dist/{src/indexer → indexer}/semantic-status.js +0 -0
  261. /package/dist/{src/indexer → indexer}/usage-events.js +0 -0
  262. /package/dist/{src/indexer → indexer}/walker.js +0 -0
  263. /package/dist/{src/integrations → integrations}/agent/config.js +0 -0
  264. /package/dist/{src/integrations → integrations}/agent/detect.js +0 -0
  265. /package/dist/{src/integrations → integrations}/agent/index.js +0 -0
  266. /package/dist/{src/integrations → integrations}/agent/profiles.js +0 -0
  267. /package/dist/{src/integrations → integrations}/agent/prompts.js +0 -0
  268. /package/dist/{src/integrations → integrations}/agent/spawn.js +0 -0
  269. /package/dist/{src/integrations → integrations}/github.js +0 -0
  270. /package/dist/{src/integrations → integrations}/lockfile.js +0 -0
  271. /package/dist/{src/llm → llm}/client.js +0 -0
  272. /package/dist/{src/llm → llm}/embedder.js +0 -0
  273. /package/dist/{src/llm → llm}/embedders/cache.js +0 -0
  274. /package/dist/{src/llm → llm}/embedders/local.js +0 -0
  275. /package/dist/{src/llm → llm}/embedders/remote.js +0 -0
  276. /package/dist/{src/llm → llm}/embedders/types.js +0 -0
  277. /package/dist/{src/llm → llm}/feature-gate.js +0 -0
  278. /package/dist/{src/llm → llm}/graph-extract.js +0 -0
  279. /package/dist/{src/llm → llm}/index-passes.js +0 -0
  280. /package/dist/{src/llm → llm}/memory-infer.js +0 -0
  281. /package/dist/{src/llm → llm}/metadata-enhance.js +0 -0
  282. /package/dist/{src/output → output}/cli-hints.js +0 -0
  283. /package/dist/{src/output → output}/context.js +0 -0
  284. /package/dist/{src/output → output}/renderers.js +0 -0
  285. /package/dist/{src/output → output}/shapes.js +0 -0
  286. /package/dist/{src/output → output}/text.js +0 -0
  287. /package/dist/{src/registry → registry}/build-index.js +0 -0
  288. /package/dist/{src/registry → registry}/create-provider-registry.js +0 -0
  289. /package/dist/{src/registry → registry}/factory.js +0 -0
  290. /package/dist/{src/registry → registry}/origin-resolve.js +0 -0
  291. /package/dist/{src/registry → registry}/providers/index.js +0 -0
  292. /package/dist/{src/registry → registry}/providers/skills-sh.js +0 -0
  293. /package/dist/{src/registry → registry}/providers/static-index.js +0 -0
  294. /package/dist/{src/registry → registry}/providers/types.js +0 -0
  295. /package/dist/{src/registry → registry}/resolve.js +0 -0
  296. /package/dist/{src/registry → registry}/types.js +0 -0
  297. /package/dist/{src/setup → setup}/detect.js +0 -0
  298. /package/dist/{src/setup → setup}/ripgrep-install.js +0 -0
  299. /package/dist/{src/setup → setup}/ripgrep-resolve.js +0 -0
  300. /package/dist/{src/setup → setup}/setup.js +0 -0
  301. /package/dist/{src/setup → setup}/steps.js +0 -0
  302. /package/dist/{src/sources → sources}/include.js +0 -0
  303. /package/dist/{src/sources → sources}/provider-factory.js +0 -0
  304. /package/dist/{src/sources → sources}/provider.js +0 -0
  305. /package/dist/{src/sources → sources}/providers/filesystem.js +0 -0
  306. /package/dist/{src/sources → sources}/providers/git.js +0 -0
  307. /package/dist/{src/sources → sources}/providers/index.js +0 -0
  308. /package/dist/{src/sources → sources}/providers/install-types.js +0 -0
  309. /package/dist/{src/sources → sources}/providers/npm.js +0 -0
  310. /package/dist/{src/sources → sources}/providers/provider-utils.js +0 -0
  311. /package/dist/{src/sources → sources}/providers/sync-from-ref.js +0 -0
  312. /package/dist/{src/sources → sources}/providers/tar-utils.js +0 -0
  313. /package/dist/{src/sources → sources}/providers/website.js +0 -0
  314. /package/dist/{src/sources → sources}/resolve.js +0 -0
  315. /package/dist/{src/sources → sources}/types.js +0 -0
  316. /package/dist/{src/templates → templates}/wiki-templates.js +0 -0
  317. /package/dist/{src/version.js → version.js} +0 -0
  318. /package/dist/{src/wiki → wiki}/wiki.js +0 -0
  319. /package/dist/{src/workflows → workflows}/authoring.js +0 -0
  320. /package/dist/{src/workflows → workflows}/cli.js +0 -0
  321. /package/dist/{src/workflows → workflows}/db.js +0 -0
  322. /package/dist/{src/workflows → workflows}/document-cache.js +0 -0
  323. /package/dist/{src/workflows → workflows}/parser.js +0 -0
  324. /package/dist/{src/workflows → workflows}/renderer.js +0 -0
  325. /package/dist/{src/workflows → workflows}/runs.js +0 -0
  326. /package/dist/{src/workflows → workflows}/schema.js +0 -0
  327. /package/dist/{src/workflows → workflows}/validator.js +0 -0
@@ -1,355 +0,0 @@
1
- /**
2
- * akm-bench run-config loader.
3
- *
4
- * A bench run config (`tests/bench/configs/*.json`) is a single-file
5
- * description of a utility/evolve invocation: providers, default model,
6
- * tasks, arms, seeds, budgets, parallel, baseline. Loading a config
7
- * resolves the providers file (from explicit `providers` / `providersRef`
8
- * fields, the `BENCH_OPENCODE_CONFIG` env var, or
9
- * `${XDG_CONFIG_HOME:-~/.config}/akm/bench-providers.json`), looks up the
10
- * effective default model, and resolves the task selector + baseline file
11
- * paths.
12
- *
13
- * Self-contained — does not import from `src/` so the bench framework
14
- * stays liftable to a standalone repo.
15
- */
16
- import fs from "node:fs";
17
- import os from "node:os";
18
- import path from "node:path";
19
- import { listTasks, loadTask } from "./corpus";
20
- import { BenchConfigError, loadOpencodeProviders, } from "./opencode-config";
21
- import { benchMkdtemp } from "./tmp";
22
- /**
23
- * Resolve a path string supporting `~` expansion and `${VAR}` env-var
24
- * expansion. Relative paths are resolved against `baseDir`.
25
- */
26
- export function resolvePathString(value, baseDir) {
27
- let s = value;
28
- // Expand ${VAR} and $VAR forms — matches the conventional shell forms.
29
- s = s.replace(/\$\{([A-Za-z_][A-Za-z0-9_]*)\}/g, (_m, name) => process.env[name] ?? "");
30
- s = s.replace(/\$([A-Za-z_][A-Za-z0-9_]*)/g, (_m, name) => process.env[name] ?? "");
31
- // Tilde expansion. `~` alone or `~/...`; we don't support `~user/`.
32
- if (s === "~")
33
- s = os.homedir();
34
- else if (s.startsWith("~/"))
35
- s = path.join(os.homedir(), s.slice(2));
36
- if (path.isAbsolute(s))
37
- return s;
38
- return path.resolve(baseDir, s);
39
- }
40
- /** Default per-operator providers location: `${XDG_CONFIG_HOME:-~/.config}/akm/bench-providers.json`. */
41
- export function defaultUserProvidersPath() {
42
- const xdg = process.env.XDG_CONFIG_HOME;
43
- const root = xdg && xdg.length > 0 ? xdg : path.join(os.homedir(), ".config");
44
- return path.join(root, "akm", "bench-providers.json");
45
- }
46
- /**
47
- * Resolve the providers file using the §A discovery chain and load it.
48
- *
49
- * 1. `BENCH_OPENCODE_CONFIG` env var (absolute path).
50
- * 2. `providers` inline in the config (materialised to a synthetic
51
- * `LoadedOpencodeProviders` without touching disk).
52
- * 3. `providersRef` in the config (with tilde / env-var expansion).
53
- * 4. `${XDG_CONFIG_HOME:-~/.config}/akm/bench-providers.json`.
54
- * 5. Throw — the caller is expected to map this to exit code 2.
55
- *
56
- * Returns `{ providers, source }` where `source` is the absolute path the
57
- * providers came from (or `"<inline>"` for the inline case).
58
- */
59
- export function resolveProviders(config, configDir) {
60
- // 1. BENCH_OPENCODE_CONFIG env var wins.
61
- const envPath = process.env.BENCH_OPENCODE_CONFIG;
62
- if (envPath && envPath.length > 0) {
63
- return loadOpencodeProviders(path.isAbsolute(envPath) ? envPath : path.resolve(envPath));
64
- }
65
- // 2. Inline providers in the config.
66
- if (config.providers !== undefined) {
67
- if (config.providersRef !== undefined) {
68
- throw new BenchConfigError("bench run config: only one of `providers` or `providersRef` may be set", true);
69
- }
70
- return materialiseInlineProviders(config);
71
- }
72
- // 3. Explicit providersRef.
73
- if (config.providersRef !== undefined) {
74
- const resolved = resolvePathString(config.providersRef, configDir);
75
- return loadOpencodeProviders(resolved);
76
- }
77
- // 4. Per-operator default location.
78
- const userPath = defaultUserProvidersPath();
79
- if (fs.existsSync(userPath)) {
80
- return loadOpencodeProviders(userPath);
81
- }
82
- // 5. Repo-local fallbacks — the same locations the legacy
83
- // `discoverOpencodeProviders` checks. The gitignored `.local.json`
84
- // overlay wins over the committed fixture so an operator's local
85
- // overrides survive a `git pull` without needing a config edit.
86
- const repoLocalPath = path.resolve(__dirname, "..", "fixtures", "bench", "opencode-providers.local.json");
87
- if (fs.existsSync(repoLocalPath)) {
88
- return loadOpencodeProviders(repoLocalPath);
89
- }
90
- const repoFixturePath = path.resolve(__dirname, "..", "fixtures", "bench", "opencode-providers.json");
91
- if (fs.existsSync(repoFixturePath)) {
92
- return loadOpencodeProviders(repoFixturePath);
93
- }
94
- // 6. No providers found.
95
- throw new BenchConfigError(`bench run config: no opencode providers found. Set \`providers\` or \`providersRef\` in the config, set BENCH_OPENCODE_CONFIG, or create ${userPath}.`, true);
96
- }
97
- /**
98
- * Build a `LoadedOpencodeProviders` from an inline `providers` map without
99
- * round-tripping through disk. We still validate via `loadOpencodeProviders`
100
- * by writing to a tmp file? No — that would risk leaving secrets on disk.
101
- * Instead, do a minimal in-memory validation that matches what the on-disk
102
- * loader checks (forbidden top-level keys are not applicable here, since
103
- * the inline providers already live inside a `providers` object; but the
104
- * credential heuristic still applies).
105
- */
106
- function materialiseInlineProviders(config) {
107
- if (config.providers === null || typeof config.providers !== "object" || Array.isArray(config.providers)) {
108
- throw new BenchConfigError("bench run config: `providers` must be an object", false);
109
- }
110
- // Reuse `loadOpencodeProviders` indirectly by stamping a synthetic
111
- // BenchOpencodeProvidersFile — without touching disk we still want the
112
- // credential scan applied. The simplest path is: write a tmp file mode
113
- // 0o600 and load it, then unlink. That keeps the credential-scan logic
114
- // co-located in opencode-config.ts.
115
- const file = {
116
- schemaVersion: 1,
117
- providers: config.providers,
118
- ...(config.defaultModel !== undefined ? { defaultModel: config.defaultModel } : {}),
119
- };
120
- // Per #276: bench tmp dirs MUST live under `${AKM_CACHE_DIR}/bench/`,
121
- // never the OS-default tmp root. `benchMkdtemp` is the drop-in.
122
- const tmpDir = benchMkdtemp("akm-bench-inline-");
123
- const tmpPath = path.join(tmpDir, "providers.json");
124
- try {
125
- fs.writeFileSync(tmpPath, JSON.stringify(file), { mode: 0o600 });
126
- const loaded = loadOpencodeProviders(tmpPath);
127
- return { ...loaded, source: "<inline>" };
128
- }
129
- finally {
130
- try {
131
- fs.rmSync(tmpDir, { recursive: true, force: true });
132
- }
133
- catch {
134
- // best-effort cleanup
135
- }
136
- }
137
- }
138
- /** Load + validate a baseline JSON file: `{ taskId: passRate (0..1) }`. */
139
- export function loadBaseline(absPath) {
140
- let raw;
141
- try {
142
- raw = fs.readFileSync(absPath, "utf8");
143
- }
144
- catch (err) {
145
- throw new BenchConfigError(`bench run config: cannot read baseline file "${absPath}": ${err instanceof Error ? err.message : String(err)}`, true);
146
- }
147
- let parsed;
148
- try {
149
- parsed = JSON.parse(raw);
150
- }
151
- catch (err) {
152
- throw new BenchConfigError(`bench run config: baseline file "${absPath}" is not valid JSON: ${err instanceof Error ? err.message : String(err)}`, false);
153
- }
154
- if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) {
155
- throw new BenchConfigError(`bench run config: baseline file "${absPath}" must be a JSON object of taskId → passRate`, false);
156
- }
157
- const out = {};
158
- for (const [key, value] of Object.entries(parsed)) {
159
- if (typeof value !== "number" || !Number.isFinite(value) || value < 0 || value > 1) {
160
- throw new BenchConfigError(`bench run config: baseline entry ${JSON.stringify(key)} in "${absPath}" must be a number in [0, 1]; got ${JSON.stringify(value)}`, false);
161
- }
162
- out[key] = value;
163
- }
164
- return out;
165
- }
166
- /**
167
- * Resolve the `tasks` selector to a concrete `TaskMetadata[]` plus a slice
168
- * label for the report's `corpus.slice` field.
169
- */
170
- export function resolveTasks(selector) {
171
- // Default = "all" when the field is omitted entirely.
172
- if (selector === undefined) {
173
- return { tasks: listTasks(), slice: "all" };
174
- }
175
- if (typeof selector === "string") {
176
- if (selector === "all" || selector === "train" || selector === "eval") {
177
- const sliceFilter = selector === "all" ? undefined : selector;
178
- const tasks = listTasks(sliceFilter ? { slice: sliceFilter } : {});
179
- return { tasks, slice: selector };
180
- }
181
- // Single task id ("domain/name") — try direct lookup first.
182
- if (selector.includes("/")) {
183
- try {
184
- return { tasks: [loadTask(selector)], slice: "all" };
185
- }
186
- catch {
187
- // Fall through to "no match" error below.
188
- }
189
- throw new BenchConfigError(`bench run config: tasks: no task matched "${selector}"`, true);
190
- }
191
- // Domain prefix (no slash).
192
- const all = listTasks();
193
- const matches = all.filter((t) => t.domain === selector);
194
- if (matches.length === 0) {
195
- throw new BenchConfigError(`bench run config: tasks: no task matched domain "${selector}". Available domains: ${[...new Set(all.map((t) => t.domain))].sort().join(", ") || "(none)"}`, true);
196
- }
197
- return { tasks: matches, slice: "all" };
198
- }
199
- // Array of task ids.
200
- if (selector.length === 0) {
201
- throw new BenchConfigError("bench run config: tasks: array must be non-empty", true);
202
- }
203
- const out = [];
204
- for (const id of selector) {
205
- try {
206
- out.push(loadTask(id));
207
- }
208
- catch {
209
- throw new BenchConfigError(`bench run config: tasks: no task matched "${id}"`, true);
210
- }
211
- }
212
- return { tasks: out, slice: "all" };
213
- }
214
- /**
215
- * Validate the parsed config against the v1 schema (in-code, no JSON
216
- * Schema runtime — keeps the bench self-contained). Throws BenchConfigError
217
- * on the first violation.
218
- */
219
- function validateConfig(parsed, source) {
220
- if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) {
221
- throw new BenchConfigError(`bench run config: root of ${source} must be a JSON object`, false);
222
- }
223
- const obj = parsed;
224
- if (obj.schemaVersion !== 1) {
225
- throw new BenchConfigError(`bench run config: ${source}: unsupported schemaVersion ${JSON.stringify(obj.schemaVersion)}; expected 1`, false);
226
- }
227
- const allowed = new Set([
228
- "$schema",
229
- "schemaVersion",
230
- "name",
231
- "description",
232
- "providers",
233
- "providersRef",
234
- "defaultModel",
235
- "tasks",
236
- "arms",
237
- "seeds",
238
- "budgetTokens",
239
- "budgetWallMs",
240
- "parallel",
241
- "forceParallel",
242
- "baseline",
243
- ]);
244
- for (const key of Object.keys(obj)) {
245
- if (!allowed.has(key)) {
246
- throw new BenchConfigError(`bench run config: ${source}: unknown field "${key}"`, false);
247
- }
248
- }
249
- if (obj.providers !== undefined && obj.providersRef !== undefined) {
250
- throw new BenchConfigError(`bench run config: ${source}: only one of "providers" or "providersRef" may be set`, true);
251
- }
252
- if (obj.tasks !== undefined) {
253
- if (typeof obj.tasks !== "string" && !Array.isArray(obj.tasks)) {
254
- throw new BenchConfigError(`bench run config: ${source}: "tasks" must be a string or array of strings`, false);
255
- }
256
- if (Array.isArray(obj.tasks)) {
257
- for (const t of obj.tasks) {
258
- if (typeof t !== "string") {
259
- throw new BenchConfigError(`bench run config: ${source}: every entry in "tasks" must be a string`, false);
260
- }
261
- }
262
- }
263
- }
264
- if (obj.arms !== undefined) {
265
- if (!Array.isArray(obj.arms) || obj.arms.length === 0) {
266
- throw new BenchConfigError(`bench run config: ${source}: "arms" must be a non-empty array`, false);
267
- }
268
- for (const a of obj.arms) {
269
- if (a !== "noakm" && a !== "akm" && a !== "synthetic") {
270
- throw new BenchConfigError(`bench run config: ${source}: invalid arm ${JSON.stringify(a)}; expected one of "noakm", "akm", "synthetic"`, false);
271
- }
272
- }
273
- }
274
- for (const numField of ["seeds", "budgetTokens", "budgetWallMs", "parallel"]) {
275
- const val = obj[numField];
276
- if (val !== undefined) {
277
- if (typeof val !== "number" || !Number.isInteger(val) || val < 1) {
278
- throw new BenchConfigError(`bench run config: ${source}: "${numField}" must be a positive integer`, false);
279
- }
280
- }
281
- }
282
- return obj;
283
- }
284
- /**
285
- * Load and resolve a bench run config from disk.
286
- *
287
- * @param configPath Absolute or relative path to the config JSON file.
288
- * @param overrides CLI-derived overrides applied on top of the config.
289
- */
290
- export function loadBenchRunConfig(configPath, overrides = {}) {
291
- const absPath = path.isAbsolute(configPath) ? configPath : path.resolve(configPath);
292
- if (!fs.existsSync(absPath)) {
293
- throw new BenchConfigError(`bench run config: file not found: ${absPath}`, true);
294
- }
295
- let raw;
296
- try {
297
- raw = fs.readFileSync(absPath, "utf8");
298
- }
299
- catch (err) {
300
- throw new BenchConfigError(`bench run config: cannot read ${absPath}: ${err instanceof Error ? err.message : String(err)}`, true);
301
- }
302
- let parsed;
303
- try {
304
- parsed = JSON.parse(raw);
305
- }
306
- catch (err) {
307
- throw new BenchConfigError(`bench run config: ${absPath}: invalid JSON: ${err instanceof Error ? err.message : String(err)}`, false);
308
- }
309
- const config = validateConfig(parsed, absPath);
310
- const configDir = path.dirname(absPath);
311
- const providers = resolveProviders(config, configDir);
312
- const envModel = process.env.BENCH_OPENCODE_MODEL;
313
- const model = (envModel && envModel.length > 0 ? envModel : undefined) ?? config.defaultModel ?? providers.defaultModel;
314
- if (!model) {
315
- throw new BenchConfigError(`bench run config: ${absPath}: no model specified. Set "defaultModel" in the config, set "defaultModel" in the providers file, or set BENCH_OPENCODE_MODEL.`, true);
316
- }
317
- // Resolve tasks (with optional CLI list override).
318
- let resolved = resolveTasks(config.tasks);
319
- if (overrides.tasksList && overrides.tasksList.length > 0) {
320
- const set = new Set(overrides.tasksList);
321
- const filtered = resolved.tasks.filter((t) => set.has(t.id));
322
- const missing = overrides.tasksList.filter((id) => !resolved.tasks.some((t) => t.id === id));
323
- if (missing.length > 0) {
324
- throw new BenchConfigError(`bench run config: --tasks override: no task in the config matched ${JSON.stringify(missing.join(", "))}`, true);
325
- }
326
- resolved = { tasks: filtered, slice: resolved.slice };
327
- }
328
- if (resolved.tasks.length === 0) {
329
- throw new BenchConfigError(`bench run config: ${absPath}: task selector matched zero tasks`, true);
330
- }
331
- let baselineByTaskId;
332
- if (config.baseline) {
333
- const baselinePath = resolvePathString(config.baseline, configDir);
334
- baselineByTaskId = loadBaseline(baselinePath);
335
- }
336
- const arms = config.arms ?? ["noakm", "akm"];
337
- const seedsPerArm = overrides.seedsPerArm ?? config.seeds;
338
- const parallel = overrides.parallel ?? config.parallel;
339
- const name = config.name ?? path.basename(absPath, path.extname(absPath));
340
- return {
341
- source: absPath,
342
- name,
343
- providers,
344
- model,
345
- tasks: resolved.tasks,
346
- arms,
347
- ...(seedsPerArm !== undefined ? { seedsPerArm } : {}),
348
- ...(config.budgetTokens !== undefined ? { budgetTokens: config.budgetTokens } : {}),
349
- ...(config.budgetWallMs !== undefined ? { budgetWallMs: config.budgetWallMs } : {}),
350
- ...(parallel !== undefined ? { parallel } : {}),
351
- ...(config.forceParallel ? { forceParallel: true } : {}),
352
- ...(baselineByTaskId ? { baselineByTaskId } : {}),
353
- slice: resolved.slice,
354
- };
355
- }
@@ -1,298 +0,0 @@
1
- /**
2
- * Unit tests for the bench run-config loader (`tests/bench/run-config.ts`).
3
- *
4
- * Covers the parts that don't require spawning a process:
5
- * - Schema validation (unknown fields, missing schemaVersion, bad arms).
6
- * - Path resolution (~ expansion, ${VAR} expansion, relative vs absolute).
7
- * - Provider discovery chain (env > inline > providersRef > XDG default).
8
- * - Baseline-file loading + range checks.
9
- * - Task selector resolution (slice / domain / id / array).
10
- *
11
- * The CLI-level dispatch is exercised by `cli.test.ts` via spawned bench
12
- * runs — keep those for end-to-end coverage; this file is unit-grade.
13
- */
14
- import { afterEach, beforeEach, describe, expect, test } from "bun:test";
15
- import fs from "node:fs";
16
- import os from "node:os";
17
- import path from "node:path";
18
- import { defaultUserProvidersPath, loadBaseline, loadBenchRunConfig, resolvePathString } from "./run-config";
19
- import { benchMkdtemp } from "./tmp";
20
- const REPO_ROOT = path.resolve(__dirname, "..", "..");
21
- let workDir;
22
- let savedEnv;
23
- beforeEach(() => {
24
- // Per #276 invariant: bench tmp dirs live under `${AKM_CACHE_DIR}/bench/`,
25
- // never the OS-default tmp root. `benchMkdtemp` is the drop-in.
26
- workDir = benchMkdtemp("akm-bench-runconfig-test-");
27
- savedEnv = {
28
- BENCH_OPENCODE_CONFIG: process.env.BENCH_OPENCODE_CONFIG,
29
- BENCH_OPENCODE_MODEL: process.env.BENCH_OPENCODE_MODEL,
30
- AKM_TEST_VAR: process.env.AKM_TEST_VAR,
31
- };
32
- delete process.env.BENCH_OPENCODE_CONFIG;
33
- delete process.env.BENCH_OPENCODE_MODEL;
34
- });
35
- afterEach(() => {
36
- fs.rmSync(workDir, { recursive: true, force: true });
37
- for (const [k, v] of Object.entries(savedEnv)) {
38
- if (v === undefined)
39
- delete process.env[k];
40
- else
41
- process.env[k] = v;
42
- }
43
- });
44
- function writeProvidersFile(filePath, defaultModel = "p/m") {
45
- fs.mkdirSync(path.dirname(filePath), { recursive: true });
46
- fs.writeFileSync(filePath, JSON.stringify({
47
- schemaVersion: 1,
48
- defaultModel,
49
- providers: { p: { npm: "@ai-sdk/openai-compatible" } },
50
- }));
51
- }
52
- describe("resolvePathString", () => {
53
- test("resolves a relative path against the supplied base dir", () => {
54
- expect(resolvePathString("foo.json", "/work")).toBe("/work/foo.json");
55
- });
56
- test("returns absolute paths unchanged", () => {
57
- expect(resolvePathString("/abs/path.json", "/work")).toBe("/abs/path.json");
58
- });
59
- test("expands `~` to the operator's home dir", () => {
60
- expect(resolvePathString("~/.config/akm/foo.json", "/work")).toBe(path.join(os.homedir(), ".config/akm/foo.json"));
61
- });
62
- test("expands env-var references", () => {
63
- // Build the input with concatenation rather than a string literal to avoid
64
- // biome's noTemplateCurlyInString flag on the `\${VAR}` form.
65
- process.env.AKM_TEST_VAR = "/somewhere";
66
- const input = `${"$"}{AKM_TEST_VAR}/providers.json`;
67
- expect(resolvePathString(input, "/work")).toBe("/somewhere/providers.json");
68
- });
69
- });
70
- describe("defaultUserProvidersPath", () => {
71
- test("respects XDG_CONFIG_HOME when set", () => {
72
- const saved = process.env.XDG_CONFIG_HOME;
73
- process.env.XDG_CONFIG_HOME = "/xdg-test";
74
- try {
75
- expect(defaultUserProvidersPath()).toBe("/xdg-test/akm/bench-providers.json");
76
- }
77
- finally {
78
- if (saved === undefined)
79
- delete process.env.XDG_CONFIG_HOME;
80
- else
81
- process.env.XDG_CONFIG_HOME = saved;
82
- }
83
- });
84
- test("falls back to ~/.config when XDG_CONFIG_HOME is unset", () => {
85
- const saved = process.env.XDG_CONFIG_HOME;
86
- delete process.env.XDG_CONFIG_HOME;
87
- try {
88
- expect(defaultUserProvidersPath()).toBe(path.join(os.homedir(), ".config/akm/bench-providers.json"));
89
- }
90
- finally {
91
- if (saved !== undefined)
92
- process.env.XDG_CONFIG_HOME = saved;
93
- }
94
- });
95
- });
96
- describe("loadBaseline", () => {
97
- test("loads a `{ taskId: passRate }` map", () => {
98
- const filePath = path.join(workDir, "baseline.json");
99
- fs.writeFileSync(filePath, JSON.stringify({ "domain/a": 0.8, "domain/b": 1.0 }));
100
- expect(loadBaseline(filePath)).toEqual({ "domain/a": 0.8, "domain/b": 1.0 });
101
- });
102
- test("rejects pass rates outside [0, 1]", () => {
103
- const filePath = path.join(workDir, "bad.json");
104
- fs.writeFileSync(filePath, JSON.stringify({ "x/y": 1.5 }));
105
- expect(() => loadBaseline(filePath)).toThrow(/must be a number in \[0, 1\]/);
106
- });
107
- test("rejects non-number values", () => {
108
- const filePath = path.join(workDir, "non-number.json");
109
- fs.writeFileSync(filePath, JSON.stringify({ "x/y": "not a number" }));
110
- expect(() => loadBaseline(filePath)).toThrow(/must be a number/);
111
- });
112
- });
113
- describe("loadBenchRunConfig — schema validation", () => {
114
- test("rejects unknown top-level fields", () => {
115
- const cfgPath = path.join(workDir, "bad.json");
116
- fs.writeFileSync(cfgPath, JSON.stringify({ schemaVersion: 1, name: "x", weirdField: 42 }));
117
- expect(() => loadBenchRunConfig(cfgPath)).toThrow(/unknown field "weirdField"/);
118
- });
119
- test("rejects missing schemaVersion", () => {
120
- const cfgPath = path.join(workDir, "noversion.json");
121
- fs.writeFileSync(cfgPath, JSON.stringify({ name: "x" }));
122
- expect(() => loadBenchRunConfig(cfgPath)).toThrow(/unsupported schemaVersion/);
123
- });
124
- test("rejects providers AND providersRef both set", () => {
125
- const cfgPath = path.join(workDir, "both.json");
126
- fs.writeFileSync(cfgPath, JSON.stringify({
127
- schemaVersion: 1,
128
- providers: { p: { npm: "x" } },
129
- providersRef: "./other.json",
130
- }));
131
- expect(() => loadBenchRunConfig(cfgPath)).toThrow(/only one of "providers" or "providersRef"/);
132
- });
133
- test("rejects bad arm values", () => {
134
- const cfgPath = path.join(workDir, "badarm.json");
135
- fs.writeFileSync(cfgPath, JSON.stringify({ schemaVersion: 1, arms: ["nope"] }));
136
- expect(() => loadBenchRunConfig(cfgPath)).toThrow(/invalid arm/);
137
- });
138
- test("missing config file exits with usage error", () => {
139
- expect(() => loadBenchRunConfig(path.join(workDir, "ghost.json"))).toThrow(/file not found/);
140
- });
141
- });
142
- describe("loadBenchRunConfig — provider discovery", () => {
143
- test("BENCH_OPENCODE_CONFIG env var wins over providersRef", () => {
144
- const envProviders = path.join(workDir, "env-providers.json");
145
- const refProviders = path.join(workDir, "ref-providers.json");
146
- writeProvidersFile(envProviders, "env/model");
147
- writeProvidersFile(refProviders, "ref/model");
148
- process.env.BENCH_OPENCODE_CONFIG = envProviders;
149
- const cfgPath = path.join(workDir, "config.json");
150
- fs.writeFileSync(cfgPath, JSON.stringify({
151
- schemaVersion: 1,
152
- providersRef: "./ref-providers.json",
153
- tasks: "all",
154
- }));
155
- // No tasks resolved so we can't actually load — just verify provider
156
- // resolution. We restrict to a single committed task to satisfy the
157
- // selector. The bench corpus exists at fixtures/bench/tasks; we use
158
- // "all" as the selector and skip past the `tasks=0` exit by writing a
159
- // selector that matches a real task.
160
- fs.writeFileSync(cfgPath, JSON.stringify({
161
- schemaVersion: 1,
162
- providersRef: "./ref-providers.json",
163
- tasks: ["drillbit/backup-policy"],
164
- }));
165
- const resolved = loadBenchRunConfig(cfgPath);
166
- expect(resolved.providers.source).toBe(envProviders);
167
- expect(resolved.model).toBe("env/model");
168
- });
169
- test("`providersRef` is resolved relative to the config file", () => {
170
- const refProviders = path.join(workDir, "subdir", "providers.json");
171
- writeProvidersFile(refProviders, "ref/model");
172
- const cfgPath = path.join(workDir, "config.json");
173
- fs.writeFileSync(cfgPath, JSON.stringify({
174
- schemaVersion: 1,
175
- providersRef: "./subdir/providers.json",
176
- tasks: ["drillbit/backup-policy"],
177
- }));
178
- const resolved = loadBenchRunConfig(cfgPath);
179
- expect(resolved.providers.source).toBe(refProviders);
180
- expect(resolved.model).toBe("ref/model");
181
- });
182
- test("config `defaultModel` overrides the providers file's defaultModel", () => {
183
- const refProviders = path.join(workDir, "providers.json");
184
- writeProvidersFile(refProviders, "ref/model");
185
- const cfgPath = path.join(workDir, "config.json");
186
- fs.writeFileSync(cfgPath, JSON.stringify({
187
- schemaVersion: 1,
188
- providersRef: "./providers.json",
189
- defaultModel: "config/model",
190
- tasks: ["drillbit/backup-policy"],
191
- }));
192
- const resolved = loadBenchRunConfig(cfgPath);
193
- expect(resolved.model).toBe("config/model");
194
- });
195
- test("BENCH_OPENCODE_MODEL env wins over both", () => {
196
- const refProviders = path.join(workDir, "providers.json");
197
- writeProvidersFile(refProviders, "ref/model");
198
- process.env.BENCH_OPENCODE_MODEL = "env/model";
199
- const cfgPath = path.join(workDir, "config.json");
200
- fs.writeFileSync(cfgPath, JSON.stringify({
201
- schemaVersion: 1,
202
- providersRef: "./providers.json",
203
- defaultModel: "config/model",
204
- tasks: ["drillbit/backup-policy"],
205
- }));
206
- const resolved = loadBenchRunConfig(cfgPath);
207
- expect(resolved.model).toBe("env/model");
208
- });
209
- });
210
- describe("loadBenchRunConfig — task resolution", () => {
211
- test("tasks=array selects exactly the listed ids", () => {
212
- const refProviders = path.join(workDir, "providers.json");
213
- writeProvidersFile(refProviders);
214
- const cfgPath = path.join(workDir, "config.json");
215
- fs.writeFileSync(cfgPath, JSON.stringify({
216
- schemaVersion: 1,
217
- providersRef: "./providers.json",
218
- tasks: ["drillbit/backup-policy", "drillbit/canary-enable"],
219
- }));
220
- const resolved = loadBenchRunConfig(cfgPath);
221
- expect(resolved.tasks.map((t) => t.id).sort()).toEqual(["drillbit/backup-policy", "drillbit/canary-enable"]);
222
- });
223
- test("tasks=domain matches every task whose domain matches", () => {
224
- const refProviders = path.join(workDir, "providers.json");
225
- writeProvidersFile(refProviders);
226
- const cfgPath = path.join(workDir, "config.json");
227
- fs.writeFileSync(cfgPath, JSON.stringify({
228
- schemaVersion: 1,
229
- providersRef: "./providers.json",
230
- tasks: "drillbit",
231
- }));
232
- const resolved = loadBenchRunConfig(cfgPath);
233
- expect(resolved.tasks.length).toBeGreaterThan(0);
234
- for (const t of resolved.tasks)
235
- expect(t.domain).toBe("drillbit");
236
- });
237
- test("tasks=single-id matches exactly that task", () => {
238
- const refProviders = path.join(workDir, "providers.json");
239
- writeProvidersFile(refProviders);
240
- const cfgPath = path.join(workDir, "config.json");
241
- fs.writeFileSync(cfgPath, JSON.stringify({
242
- schemaVersion: 1,
243
- providersRef: "./providers.json",
244
- tasks: "drillbit/backup-policy",
245
- }));
246
- const resolved = loadBenchRunConfig(cfgPath);
247
- expect(resolved.tasks.map((t) => t.id)).toEqual(["drillbit/backup-policy"]);
248
- });
249
- test("--tasks override (CLI) restricts to a subset of the config's selection", () => {
250
- const refProviders = path.join(workDir, "providers.json");
251
- writeProvidersFile(refProviders);
252
- const cfgPath = path.join(workDir, "config.json");
253
- fs.writeFileSync(cfgPath, JSON.stringify({
254
- schemaVersion: 1,
255
- providersRef: "./providers.json",
256
- tasks: ["drillbit/backup-policy", "drillbit/canary-enable"],
257
- }));
258
- const resolved = loadBenchRunConfig(cfgPath, { tasksList: ["drillbit/canary-enable"] });
259
- expect(resolved.tasks.map((t) => t.id)).toEqual(["drillbit/canary-enable"]);
260
- });
261
- test("baseline path is resolved relative to the config file", () => {
262
- const refProviders = path.join(workDir, "providers.json");
263
- writeProvidersFile(refProviders);
264
- const baselinePath = path.join(workDir, "baseline.json");
265
- fs.writeFileSync(baselinePath, JSON.stringify({ "drillbit/backup-policy": 0.8 }));
266
- const cfgPath = path.join(workDir, "config.json");
267
- fs.writeFileSync(cfgPath, JSON.stringify({
268
- schemaVersion: 1,
269
- providersRef: "./providers.json",
270
- tasks: ["drillbit/backup-policy"],
271
- baseline: "./baseline.json",
272
- }));
273
- const resolved = loadBenchRunConfig(cfgPath);
274
- expect(resolved.baselineByTaskId).toEqual({ "drillbit/backup-policy": 0.8 });
275
- });
276
- });
277
- describe("loadBenchRunConfig — committed configs validate", () => {
278
- test("tests/bench/configs/nano-quick.json loads cleanly", () => {
279
- const cfgPath = path.join(REPO_ROOT, "tests", "bench", "configs", "nano-quick.json");
280
- const resolved = loadBenchRunConfig(cfgPath);
281
- expect(resolved.name).toBe("nano-quick");
282
- expect(resolved.arms).toEqual(["akm"]);
283
- expect(resolved.seedsPerArm).toBe(2);
284
- expect(resolved.tasks.length).toBe(5);
285
- });
286
- test("tests/bench/configs/full.json loads cleanly and carries the baseline", () => {
287
- const cfgPath = path.join(REPO_ROOT, "tests", "bench", "configs", "full.json");
288
- const resolved = loadBenchRunConfig(cfgPath);
289
- expect(resolved.name).toBe("full");
290
- expect(resolved.baselineByTaskId).toBeDefined();
291
- expect(typeof resolved.baselineByTaskId?.["drillbit/backup-policy"]).toBe("number");
292
- });
293
- test("tests/bench/configs/curate-test.json restricts to one task", () => {
294
- const cfgPath = path.join(REPO_ROOT, "tests", "bench", "configs", "curate-test.json");
295
- const resolved = loadBenchRunConfig(cfgPath);
296
- expect(resolved.tasks.map((t) => t.id)).toEqual(["inkwell/configure-scaling"]);
297
- });
298
- });