akm-cli 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (332) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/dist/{src/cli.js → cli.js} +22 -8
  3. package/dist/{src/commands → commands}/installed-stashes.js +1 -1
  4. package/dist/{src/commands → commands}/source-add.js +1 -1
  5. package/dist/{src/core → core}/common.js +16 -1
  6. package/dist/{src/core → core}/config.js +5 -2
  7. package/dist/{src/indexer → indexer}/db-search.js +16 -1
  8. package/dist/{src/indexer → indexer}/graph-extraction.js +5 -3
  9. package/dist/{src/indexer → indexer}/indexer.js +27 -11
  10. package/dist/{src/indexer → indexer}/memory-inference.js +47 -58
  11. package/dist/{src/indexer → indexer}/search-source.js +1 -1
  12. package/dist/{src/llm → llm}/client.js +61 -1
  13. package/dist/{src/llm → llm}/embedder.js +8 -5
  14. package/dist/{src/llm → llm}/embedders/local.js +8 -2
  15. package/dist/{src/llm → llm}/embedders/remote.js +4 -2
  16. package/dist/{src/llm → llm}/graph-extract.js +4 -4
  17. package/dist/llm/memory-infer.js +114 -0
  18. package/dist/{src/llm → llm}/metadata-enhance.js +2 -2
  19. package/dist/{src/output → output}/cli-hints.js +2 -0
  20. package/dist/{src/setup → setup}/setup.js +30 -20
  21. package/dist/sources/providers/website.js +27 -0
  22. package/dist/{src/sources/providers/website.js → sources/website-ingest.js} +38 -51
  23. package/docs/README.md +7 -0
  24. package/docs/migration/release-notes/0.7.0.md +14 -0
  25. package/package.json +11 -8
  26. package/dist/src/llm/memory-infer.js +0 -86
  27. package/dist/tests/add-website-source.test.js +0 -119
  28. package/dist/tests/agent/agent-config-loader.test.js +0 -70
  29. package/dist/tests/agent/agent-config.test.js +0 -221
  30. package/dist/tests/agent/agent-detect.test.js +0 -100
  31. package/dist/tests/agent/agent-spawn.test.js +0 -234
  32. package/dist/tests/agent-output.test.js +0 -186
  33. package/dist/tests/architecture/agent-no-llm-sdk-guard.test.js +0 -103
  34. package/dist/tests/architecture/agent-spawn-seam.test.js +0 -193
  35. package/dist/tests/architecture/llm-stateless-seam.test.js +0 -112
  36. package/dist/tests/asset-ref.test.js +0 -192
  37. package/dist/tests/asset-registry.test.js +0 -103
  38. package/dist/tests/asset-spec.test.js +0 -241
  39. package/dist/tests/bench/attribution.test.js +0 -996
  40. package/dist/tests/bench/cleanup-sigint.test.js +0 -83
  41. package/dist/tests/bench/cleanup.js +0 -234
  42. package/dist/tests/bench/cleanup.test.js +0 -166
  43. package/dist/tests/bench/cli.js +0 -1018
  44. package/dist/tests/bench/cli.test.js +0 -445
  45. package/dist/tests/bench/compare.test.js +0 -556
  46. package/dist/tests/bench/corpus.js +0 -317
  47. package/dist/tests/bench/corpus.test.js +0 -258
  48. package/dist/tests/bench/doctor.js +0 -525
  49. package/dist/tests/bench/driver.js +0 -401
  50. package/dist/tests/bench/driver.test.js +0 -584
  51. package/dist/tests/bench/environment.js +0 -233
  52. package/dist/tests/bench/environment.test.js +0 -199
  53. package/dist/tests/bench/evolve-metrics.js +0 -179
  54. package/dist/tests/bench/evolve-metrics.test.js +0 -187
  55. package/dist/tests/bench/evolve.js +0 -647
  56. package/dist/tests/bench/evolve.test.js +0 -624
  57. package/dist/tests/bench/failure-modes.test.js +0 -349
  58. package/dist/tests/bench/feedback-integrity.test.js +0 -457
  59. package/dist/tests/bench/leakage.test.js +0 -228
  60. package/dist/tests/bench/learning-curve.test.js +0 -134
  61. package/dist/tests/bench/metrics.js +0 -2395
  62. package/dist/tests/bench/metrics.test.js +0 -1150
  63. package/dist/tests/bench/no-os-tmpdir-invariant.test.js +0 -43
  64. package/dist/tests/bench/opencode-config.js +0 -194
  65. package/dist/tests/bench/opencode-config.test.js +0 -370
  66. package/dist/tests/bench/report.js +0 -1885
  67. package/dist/tests/bench/report.test.js +0 -1038
  68. package/dist/tests/bench/run-config.js +0 -355
  69. package/dist/tests/bench/run-config.test.js +0 -298
  70. package/dist/tests/bench/run-curate-test.js +0 -32
  71. package/dist/tests/bench/run-failing-tasks.js +0 -56
  72. package/dist/tests/bench/run-full-bench.js +0 -51
  73. package/dist/tests/bench/run-items36-targeted.js +0 -69
  74. package/dist/tests/bench/run-nano-quick.js +0 -42
  75. package/dist/tests/bench/run-waveg-targeted.js +0 -62
  76. package/dist/tests/bench/runner.js +0 -699
  77. package/dist/tests/bench/runner.test.js +0 -958
  78. package/dist/tests/bench/search-bridge.test.js +0 -331
  79. package/dist/tests/bench/tmp.js +0 -131
  80. package/dist/tests/bench/trajectory.js +0 -116
  81. package/dist/tests/bench/trajectory.test.js +0 -127
  82. package/dist/tests/bench/verifier.js +0 -114
  83. package/dist/tests/bench/verifier.test.js +0 -118
  84. package/dist/tests/bench/workflow-evaluator.js +0 -557
  85. package/dist/tests/bench/workflow-evaluator.test.js +0 -421
  86. package/dist/tests/bench/workflow-spec.js +0 -345
  87. package/dist/tests/bench/workflow-spec.test.js +0 -363
  88. package/dist/tests/bench/workflow-trace.js +0 -472
  89. package/dist/tests/bench/workflow-trace.test.js +0 -254
  90. package/dist/tests/benchmark-search-quality.js +0 -536
  91. package/dist/tests/benchmark-suite.js +0 -1441
  92. package/dist/tests/capture-cli.test.js +0 -112
  93. package/dist/tests/cli-errors.test.js +0 -204
  94. package/dist/tests/commands/events.test.js +0 -370
  95. package/dist/tests/commands/history.test.js +0 -418
  96. package/dist/tests/commands/import.test.js +0 -103
  97. package/dist/tests/commands/proposal-cli.test.js +0 -209
  98. package/dist/tests/commands/reflect-propose-cli.test.js +0 -333
  99. package/dist/tests/commands/remember.test.js +0 -97
  100. package/dist/tests/commands/scope-flags.test.js +0 -300
  101. package/dist/tests/commands/search.test.js +0 -537
  102. package/dist/tests/commands/show-indexer-parity.test.js +0 -117
  103. package/dist/tests/commands/show.test.js +0 -294
  104. package/dist/tests/common.test.js +0 -266
  105. package/dist/tests/completions.test.js +0 -142
  106. package/dist/tests/config-cli.test.js +0 -193
  107. package/dist/tests/config-llm-features.test.js +0 -139
  108. package/dist/tests/config.test.js +0 -569
  109. package/dist/tests/contracts/migration-baseline.test.js +0 -43
  110. package/dist/tests/contracts/reflect-propose-envelope.test.js +0 -139
  111. package/dist/tests/contracts/spec-helpers.js +0 -46
  112. package/dist/tests/contracts/v1-spec-section-11-proposal-queue.test.js +0 -228
  113. package/dist/tests/contracts/v1-spec-section-12-agent-config.test.js +0 -56
  114. package/dist/tests/contracts/v1-spec-section-13-lesson-type.test.js +0 -34
  115. package/dist/tests/contracts/v1-spec-section-14-llm-features.test.js +0 -94
  116. package/dist/tests/contracts/v1-spec-section-4-1-asset-types.test.js +0 -39
  117. package/dist/tests/contracts/v1-spec-section-4-2-quality-rules.test.js +0 -44
  118. package/dist/tests/contracts/v1-spec-section-5-configuration.test.js +0 -47
  119. package/dist/tests/contracts/v1-spec-section-6-orchestration.test.js +0 -40
  120. package/dist/tests/contracts/v1-spec-section-7-module-layout.test.js +0 -58
  121. package/dist/tests/contracts/v1-spec-section-8-extension-points.test.js +0 -34
  122. package/dist/tests/contracts/v1-spec-section-9-4-cli-surface.test.js +0 -75
  123. package/dist/tests/contracts/v1-spec-section-9-7-llm-agent-boundary.test.js +0 -36
  124. package/dist/tests/core/write-source.test.js +0 -366
  125. package/dist/tests/curate-command.test.js +0 -87
  126. package/dist/tests/db-scoring.test.js +0 -201
  127. package/dist/tests/db.test.js +0 -654
  128. package/dist/tests/distill-cli-flag.test.js +0 -208
  129. package/dist/tests/distill.test.js +0 -515
  130. package/dist/tests/docker-install.test.js +0 -120
  131. package/dist/tests/e2e.test.js +0 -1419
  132. package/dist/tests/embedder.test.js +0 -340
  133. package/dist/tests/embedding-model-config.test.js +0 -379
  134. package/dist/tests/feedback-command.test.js +0 -172
  135. package/dist/tests/file-context.test.js +0 -552
  136. package/dist/tests/fixtures/scripts/git/summarize-diff.js +0 -9
  137. package/dist/tests/fixtures/scripts/lint/eslint-check.js +0 -7
  138. package/dist/tests/fixtures/stashes/load.js +0 -166
  139. package/dist/tests/fixtures/stashes/load.test.js +0 -97
  140. package/dist/tests/fixtures/stashes/ranking-baseline/scripts/mem0-search.js +0 -12
  141. package/dist/tests/frontmatter.test.js +0 -190
  142. package/dist/tests/fts-field-weighting.test.js +0 -254
  143. package/dist/tests/fuzzy-search.test.js +0 -230
  144. package/dist/tests/git-provider-clone.test.js +0 -45
  145. package/dist/tests/github.test.js +0 -161
  146. package/dist/tests/graph-boost-ranking.test.js +0 -305
  147. package/dist/tests/graph-extraction.test.js +0 -282
  148. package/dist/tests/helpers/usage-events.js +0 -8
  149. package/dist/tests/index-pass-llm.test.js +0 -161
  150. package/dist/tests/indexer.test.js +0 -570
  151. package/dist/tests/info-command.test.js +0 -166
  152. package/dist/tests/init.test.js +0 -69
  153. package/dist/tests/install-script.test.js +0 -246
  154. package/dist/tests/integration/agent-real-profile.test.js +0 -94
  155. package/dist/tests/issue-36-repro.test.js +0 -304
  156. package/dist/tests/issues-191-194.test.js +0 -160
  157. package/dist/tests/lesson-lint.test.js +0 -111
  158. package/dist/tests/llm-client.test.js +0 -115
  159. package/dist/tests/llm-feature-gate.test.js +0 -151
  160. package/dist/tests/llm.test.js +0 -139
  161. package/dist/tests/lockfile.test.js +0 -216
  162. package/dist/tests/manifest.test.js +0 -205
  163. package/dist/tests/markdown.test.js +0 -126
  164. package/dist/tests/matchers-unit.test.js +0 -189
  165. package/dist/tests/memory-inference.test.js +0 -299
  166. package/dist/tests/merge-scoring.test.js +0 -136
  167. package/dist/tests/metadata.test.js +0 -313
  168. package/dist/tests/migration-help.test.js +0 -89
  169. package/dist/tests/origin-resolve.test.js +0 -124
  170. package/dist/tests/output-baseline.test.js +0 -218
  171. package/dist/tests/output-shapes-unit.test.js +0 -478
  172. package/dist/tests/parallel-search.test.js +0 -272
  173. package/dist/tests/parameter-metadata.test.js +0 -365
  174. package/dist/tests/paths.test.js +0 -177
  175. package/dist/tests/progressive-disclosure.test.js +0 -280
  176. package/dist/tests/proposals.test.js +0 -279
  177. package/dist/tests/proposed-quality.test.js +0 -271
  178. package/dist/tests/provider-registry.test.js +0 -32
  179. package/dist/tests/ranking-regression.test.js +0 -548
  180. package/dist/tests/reflect-propose.test.js +0 -455
  181. package/dist/tests/registry-build-index.test.js +0 -394
  182. package/dist/tests/registry-cli.test.js +0 -290
  183. package/dist/tests/registry-index-v2.test.js +0 -430
  184. package/dist/tests/registry-install.test.js +0 -728
  185. package/dist/tests/registry-providers/parity.test.js +0 -189
  186. package/dist/tests/registry-providers/skills-sh.test.js +0 -309
  187. package/dist/tests/registry-providers/static-index.test.js +0 -238
  188. package/dist/tests/registry-resolve.test.js +0 -126
  189. package/dist/tests/registry-search.test.js +0 -923
  190. package/dist/tests/remember-frontmatter.test.js +0 -378
  191. package/dist/tests/remember-unit.test.js +0 -123
  192. package/dist/tests/ripgrep-install.test.js +0 -251
  193. package/dist/tests/ripgrep-resolve.test.js +0 -108
  194. package/dist/tests/ripgrep.test.js +0 -163
  195. package/dist/tests/save-command.test.js +0 -94
  196. package/dist/tests/save-trust-qa-fixes.test.js +0 -270
  197. package/dist/tests/scoring-pipeline.test.js +0 -648
  198. package/dist/tests/search-include-proposed-cli.test.js +0 -118
  199. package/dist/tests/self-update.test.js +0 -442
  200. package/dist/tests/semantic-search-e2e.test.js +0 -512
  201. package/dist/tests/semantic-status.test.js +0 -471
  202. package/dist/tests/setup-run.integration.js +0 -877
  203. package/dist/tests/setup-wizard.test.js +0 -198
  204. package/dist/tests/setup.test.js +0 -131
  205. package/dist/tests/source-add.test.js +0 -11
  206. package/dist/tests/source-clone.test.js +0 -254
  207. package/dist/tests/source-manage.test.js +0 -366
  208. package/dist/tests/source-providers/filesystem.test.js +0 -82
  209. package/dist/tests/source-providers/git.test.js +0 -252
  210. package/dist/tests/source-providers/website.test.js +0 -128
  211. package/dist/tests/source-qa-fixes.test.js +0 -286
  212. package/dist/tests/source-registry.test.js +0 -350
  213. package/dist/tests/source-resolve.test.js +0 -100
  214. package/dist/tests/source-source.test.js +0 -281
  215. package/dist/tests/source.test.js +0 -533
  216. package/dist/tests/tar-utils-scan.test.js +0 -73
  217. package/dist/tests/toggle-components.test.js +0 -73
  218. package/dist/tests/usage-telemetry.test.js +0 -265
  219. package/dist/tests/utility-scoring.test.js +0 -558
  220. package/dist/tests/vault-load-error.test.js +0 -78
  221. package/dist/tests/vault-qa-fixes.test.js +0 -194
  222. package/dist/tests/vault.test.js +0 -429
  223. package/dist/tests/vector-search.test.js +0 -608
  224. package/dist/tests/walker.test.js +0 -252
  225. package/dist/tests/wave2-cluster-bc.test.js +0 -228
  226. package/dist/tests/wave2-cluster-d.test.js +0 -180
  227. package/dist/tests/wave2-cluster-e.test.js +0 -179
  228. package/dist/tests/wiki-qa-fixes.test.js +0 -270
  229. package/dist/tests/wiki.test.js +0 -529
  230. package/dist/tests/workflow-cli.test.js +0 -271
  231. package/dist/tests/workflow-markdown.test.js +0 -171
  232. package/dist/tests/workflow-path-escape.test.js +0 -132
  233. package/dist/tests/workflow-qa-fixes.test.js +0 -395
  234. package/dist/tests/workflows/indexer-rejection.test.js +0 -213
  235. /package/dist/{src/commands → commands}/completions.js +0 -0
  236. /package/dist/{src/commands → commands}/config-cli.js +0 -0
  237. /package/dist/{src/commands → commands}/curate.js +0 -0
  238. /package/dist/{src/commands → commands}/distill.js +0 -0
  239. /package/dist/{src/commands → commands}/events.js +0 -0
  240. /package/dist/{src/commands → commands}/history.js +0 -0
  241. /package/dist/{src/commands → commands}/info.js +0 -0
  242. /package/dist/{src/commands → commands}/init.js +0 -0
  243. /package/dist/{src/commands → commands}/install-audit.js +0 -0
  244. /package/dist/{src/commands → commands}/migration-help.js +0 -0
  245. /package/dist/{src/commands → commands}/proposal.js +0 -0
  246. /package/dist/{src/commands → commands}/propose.js +0 -0
  247. /package/dist/{src/commands → commands}/reflect.js +0 -0
  248. /package/dist/{src/commands → commands}/registry-search.js +0 -0
  249. /package/dist/{src/commands → commands}/remember.js +0 -0
  250. /package/dist/{src/commands → commands}/search.js +0 -0
  251. /package/dist/{src/commands → commands}/self-update.js +0 -0
  252. /package/dist/{src/commands → commands}/show.js +0 -0
  253. /package/dist/{src/commands → commands}/source-clone.js +0 -0
  254. /package/dist/{src/commands → commands}/source-manage.js +0 -0
  255. /package/dist/{src/commands → commands}/vault.js +0 -0
  256. /package/dist/{src/core → core}/asset-ref.js +0 -0
  257. /package/dist/{src/core → core}/asset-registry.js +0 -0
  258. /package/dist/{src/core → core}/asset-spec.js +0 -0
  259. /package/dist/{src/core → core}/errors.js +0 -0
  260. /package/dist/{src/core → core}/events.js +0 -0
  261. /package/dist/{src/core → core}/frontmatter.js +0 -0
  262. /package/dist/{src/core → core}/lesson-lint.js +0 -0
  263. /package/dist/{src/core → core}/markdown.js +0 -0
  264. /package/dist/{src/core → core}/paths.js +0 -0
  265. /package/dist/{src/core → core}/proposals.js +0 -0
  266. /package/dist/{src/core → core}/warn.js +0 -0
  267. /package/dist/{src/core → core}/write-source.js +0 -0
  268. /package/dist/{src/indexer → indexer}/db.js +0 -0
  269. /package/dist/{src/indexer → indexer}/file-context.js +0 -0
  270. /package/dist/{src/indexer → indexer}/graph-boost.js +0 -0
  271. /package/dist/{src/indexer → indexer}/manifest.js +0 -0
  272. /package/dist/{src/indexer → indexer}/matchers.js +0 -0
  273. /package/dist/{src/indexer → indexer}/metadata.js +0 -0
  274. /package/dist/{src/indexer → indexer}/search-fields.js +0 -0
  275. /package/dist/{src/indexer → indexer}/semantic-status.js +0 -0
  276. /package/dist/{src/indexer → indexer}/usage-events.js +0 -0
  277. /package/dist/{src/indexer → indexer}/walker.js +0 -0
  278. /package/dist/{src/integrations → integrations}/agent/config.js +0 -0
  279. /package/dist/{src/integrations → integrations}/agent/detect.js +0 -0
  280. /package/dist/{src/integrations → integrations}/agent/index.js +0 -0
  281. /package/dist/{src/integrations → integrations}/agent/profiles.js +0 -0
  282. /package/dist/{src/integrations → integrations}/agent/prompts.js +0 -0
  283. /package/dist/{src/integrations → integrations}/agent/spawn.js +0 -0
  284. /package/dist/{src/integrations → integrations}/github.js +0 -0
  285. /package/dist/{src/integrations → integrations}/lockfile.js +0 -0
  286. /package/dist/{src/llm → llm}/embedders/cache.js +0 -0
  287. /package/dist/{src/llm → llm}/embedders/types.js +0 -0
  288. /package/dist/{src/llm → llm}/feature-gate.js +0 -0
  289. /package/dist/{src/llm → llm}/index-passes.js +0 -0
  290. /package/dist/{src/output → output}/context.js +0 -0
  291. /package/dist/{src/output → output}/renderers.js +0 -0
  292. /package/dist/{src/output → output}/shapes.js +0 -0
  293. /package/dist/{src/output → output}/text.js +0 -0
  294. /package/dist/{src/registry → registry}/build-index.js +0 -0
  295. /package/dist/{src/registry → registry}/create-provider-registry.js +0 -0
  296. /package/dist/{src/registry → registry}/factory.js +0 -0
  297. /package/dist/{src/registry → registry}/origin-resolve.js +0 -0
  298. /package/dist/{src/registry → registry}/providers/index.js +0 -0
  299. /package/dist/{src/registry → registry}/providers/skills-sh.js +0 -0
  300. /package/dist/{src/registry → registry}/providers/static-index.js +0 -0
  301. /package/dist/{src/registry → registry}/providers/types.js +0 -0
  302. /package/dist/{src/registry → registry}/resolve.js +0 -0
  303. /package/dist/{src/registry → registry}/types.js +0 -0
  304. /package/dist/{src/setup → setup}/detect.js +0 -0
  305. /package/dist/{src/setup → setup}/ripgrep-install.js +0 -0
  306. /package/dist/{src/setup → setup}/ripgrep-resolve.js +0 -0
  307. /package/dist/{src/setup → setup}/steps.js +0 -0
  308. /package/dist/{src/sources → sources}/include.js +0 -0
  309. /package/dist/{src/sources → sources}/provider-factory.js +0 -0
  310. /package/dist/{src/sources → sources}/provider.js +0 -0
  311. /package/dist/{src/sources → sources}/providers/filesystem.js +0 -0
  312. /package/dist/{src/sources → sources}/providers/git.js +0 -0
  313. /package/dist/{src/sources → sources}/providers/index.js +0 -0
  314. /package/dist/{src/sources → sources}/providers/install-types.js +0 -0
  315. /package/dist/{src/sources → sources}/providers/npm.js +0 -0
  316. /package/dist/{src/sources → sources}/providers/provider-utils.js +0 -0
  317. /package/dist/{src/sources → sources}/providers/sync-from-ref.js +0 -0
  318. /package/dist/{src/sources → sources}/providers/tar-utils.js +0 -0
  319. /package/dist/{src/sources → sources}/resolve.js +0 -0
  320. /package/dist/{src/sources → sources}/types.js +0 -0
  321. /package/dist/{src/templates → templates}/wiki-templates.js +0 -0
  322. /package/dist/{src/version.js → version.js} +0 -0
  323. /package/dist/{src/wiki → wiki}/wiki.js +0 -0
  324. /package/dist/{src/workflows → workflows}/authoring.js +0 -0
  325. /package/dist/{src/workflows → workflows}/cli.js +0 -0
  326. /package/dist/{src/workflows → workflows}/db.js +0 -0
  327. /package/dist/{src/workflows → workflows}/document-cache.js +0 -0
  328. /package/dist/{src/workflows → workflows}/parser.js +0 -0
  329. /package/dist/{src/workflows → workflows}/renderer.js +0 -0
  330. /package/dist/{src/workflows → workflows}/runs.js +0 -0
  331. /package/dist/{src/workflows → workflows}/schema.js +0 -0
  332. /package/dist/{src/workflows → workflows}/validator.js +0 -0
@@ -1,401 +0,0 @@
1
- /**
2
- * akm-bench driver — `runOne(options)` executes a single (task, arm, seed)
3
- * triple end-to-end and returns a v1 RunResult envelope.
4
- *
5
- * See `docs/technical/benchmark.md` §5.2 for the locked schema and §7.1/§7.2
6
- * for the isolation/budget rules. The shapes here are the v1 contract that
7
- * #238/#239/#240/#243 will extend without breaking.
8
- *
9
- * Design notes:
10
- * • The driver invokes opencode through `runAgent` with the built-in
11
- * `opencode` profile. No new harness abstraction.
12
- * • Per-run isolation: every run gets fresh tmpdirs for `XDG_CACHE_HOME`,
13
- * `XDG_CONFIG_HOME`, `OPENCODE_CONFIG`, and (when `stashDir` is provided)
14
- * `AKM_STASH_DIR`. The operator's personal opencode/akm config is NEVER
15
- * read or written.
16
- * • Hard budgets: `budgetWallMs` is enforced via `runAgent`'s timeout. A
17
- * timeout produces `outcome: "budget_exceeded"`, which is a distinct
18
- * state from `fail` so cost regressions stay visible.
19
- * • This issue (#236) does not need a real opencode call to work end-to-end.
20
- * The harness shape, isolation, and result envelope must be correct and
21
- * unit-testable with an injected fake spawn.
22
- */
23
- import fs from "node:fs";
24
- import os from "node:os";
25
- import path from "node:path";
26
- import { BUILTIN_AGENT_PROFILE_NAMES, getBuiltinAgentProfile } from "../../src/integrations/agent/profiles";
27
- import { runAgent } from "../../src/integrations/agent/spawn";
28
- import { setupBenchEnvironment } from "./environment";
29
- import { benchMkdtemp } from "./tmp";
30
- import { runVerifier } from "./verifier";
31
- /** Operator-config env names that MUST NOT leak into per-run children. */
32
- const ISOLATED_ENV_NAMES = ["OPENCODE_CONFIG", "AKM_STASH_DIR", "XDG_CACHE_HOME", "XDG_CONFIG_HOME"];
33
- /**
34
- * Operator-env names that MUST be stripped from `envSource` before the bench
35
- * driver hands it to `runAgent`. These are credentials and config-dir hints
36
- * that belong to the operator's *interactive* environment and have no
37
- * business inside a bench-arm child:
38
- *
39
- * • `OPENCODE_API_KEY` / `ANTHROPIC_API_KEY` — real-money credentials. The
40
- * opencode profile lists `OPENCODE_API_KEY` in `envPassthrough`, so
41
- * without explicit scrubbing the bench would forward the operator's key
42
- * into every (task × arm × seed) child. Bench is hermetic by design;
43
- * credentials must be supplied through the bench's own config surface,
44
- * not inherited.
45
- * • `AKM_CONFIG_DIR` — points akm at the operator's stash config. Letting
46
- * this leak defeats the per-run isolation tmpdirs `createIsolationDirs`
47
- * materialises (XDG_CACHE_HOME / XDG_CONFIG_HOME) and would cause
48
- * bench runs to read the operator's writable config.
49
- *
50
- * Recurrence guard for #271 (mirrors the #243/#251 fixup pattern of
51
- * pinning isolation behaviour with regression tests).
52
- */
53
- const SCRUBBED_OPERATOR_ENV_NAMES = ["OPENCODE_API_KEY", "ANTHROPIC_API_KEY", "AKM_CONFIG_DIR"];
54
- /**
55
- * Build the `envSource` passed to `runAgent`. Returns a copy of `source`
56
- * (default: `process.env`) with `SCRUBBED_OPERATOR_ENV_NAMES` removed so
57
- * profile-level passthrough (`profile.envPassthrough`) cannot drag operator
58
- * credentials/config-dir hints into the bench-arm child.
59
- *
60
- * The returned object is a shallow copy — callers may mutate it without
61
- * touching the real `process.env`.
62
- */
63
- export function buildSanitizedEnvSource(source) {
64
- const src = source ?? process.env;
65
- const out = { ...src };
66
- for (const name of SCRUBBED_OPERATOR_ENV_NAMES) {
67
- delete out[name];
68
- }
69
- return out;
70
- }
71
- export function createIsolationDirs(stashDir) {
72
- const root = benchMkdtemp("akm-bench-run-");
73
- const cacheHome = path.join(root, "cache");
74
- const configHome = path.join(root, "config");
75
- const opencodeConfig = path.join(root, "opencode-config");
76
- fs.mkdirSync(cacheHome, { recursive: true });
77
- fs.mkdirSync(configHome, { recursive: true });
78
- fs.mkdirSync(opencodeConfig, { recursive: true });
79
- // Symlink the real opencode config dir into XDG_CONFIG_HOME so opencode
80
- // can find its installed npm provider packages (node_modules). Without
81
- // this, overriding XDG_CONFIG_HOME produces an empty opencode config dir
82
- // and provider plugins (e.g. @ai-sdk/openai-compatible) fail to load.
83
- // OPENCODE_CONFIG still points to our materialised file, which opencode
84
- // reads in preference to XDG_CONFIG_HOME/opencode/opencode.json.
85
- const realOpencodeConfigDir = path.join(os.homedir(), ".config", "opencode");
86
- const isolatedOpencodeConfigDir = path.join(configHome, "opencode");
87
- if (fs.existsSync(realOpencodeConfigDir)) {
88
- fs.symlinkSync(realOpencodeConfigDir, isolatedOpencodeConfigDir);
89
- }
90
- else {
91
- fs.mkdirSync(isolatedOpencodeConfigDir, { recursive: true });
92
- }
93
- return {
94
- root,
95
- cacheHome,
96
- configHome,
97
- opencodeConfig,
98
- akmStashDir: stashDir,
99
- };
100
- }
101
- /** Build the env passed to `runAgent`. The XDG/AKM/OPENCODE keys are pinned. */
102
- export function buildIsolatedEnv(dirs, model) {
103
- const env = {
104
- XDG_CACHE_HOME: dirs.cacheHome,
105
- XDG_CONFIG_HOME: dirs.configHome,
106
- OPENCODE_CONFIG: path.join(dirs.opencodeConfig, "opencode.json"),
107
- BENCH_OPENCODE_MODEL: model,
108
- };
109
- if (dirs.akmStashDir)
110
- env.AKM_STASH_DIR = dirs.akmStashDir;
111
- return env;
112
- }
113
- /**
114
- * Strip `AKM_STASH_DIR` from a child env object. Used by the synthetic-arm
115
- * spawn path (#261) so the operator's real `AKM_STASH_DIR` cannot leak in
116
- * via the parent process even when the harness has copied a wider env via
117
- * `{ ...process.env, ...env }`. This is the recurrence guard for the #243
118
- * fixup pattern — a synthetic-arm child must NEVER inherit a stash.
119
- *
120
- * Mutates `env` in place and returns it for ergonomic chaining.
121
- */
122
- export function stripAkmStashDir(env) {
123
- delete env.AKM_STASH_DIR;
124
- return env;
125
- }
126
- /**
127
- * Best-effort token-usage parser for opencode stdout. Returns numeric token
128
- * counts AND a measurement status so callers can distinguish a real zero
129
- * (`"parsed"`, both fields legitimately 0) from an unparseable / absent
130
- * report (`"missing"`, both fields default to 0 but downstream aggregation
131
- * MUST skip the run rather than treat that 0 as measured).
132
- *
133
- * The harness never emits `"unsupported"` from this parser — that label is
134
- * stamped on results from arms that don't run a token-reporting agent
135
- * (e.g. the synthetic arm), and is set by the caller, not here.
136
- */
137
- export function parseTokenUsage(stdout) {
138
- // opencode prints lines like `tokens: input=1234 output=5678` in some
139
- // configurations. We look for the keys defensively; absent values mean we
140
- // could not measure (`measurement: "missing"`).
141
- const inputMatch = stdout.match(/(?:input[_\s-]?tokens?|tokens?[_\s-]?input)[\s:=]+(\d+)/i);
142
- const outputMatch = stdout.match(/(?:output[_\s-]?tokens?|tokens?[_\s-]?output)[\s:=]+(\d+)/i);
143
- if (!inputMatch && !outputMatch) {
144
- return { input: 0, output: 0, measurement: "missing" };
145
- }
146
- return {
147
- input: inputMatch ? Number.parseInt(inputMatch[1], 10) : 0,
148
- output: outputMatch ? Number.parseInt(outputMatch[1], 10) : 0,
149
- measurement: "parsed",
150
- };
151
- }
152
- /**
153
- * Maximum bytes read from events.jsonl per run. A runaway agent producing
154
- * GBs of structured-log output would otherwise OOM the bench. Trajectory
155
- * parsing operates on the prefix; a warning is appended when the cap is
156
- * hit so the report surfaces the truncation.
157
- */
158
- export const EVENTS_READ_CAP_BYTES = 16 * 1024 * 1024;
159
- /**
160
- * Read the events.jsonl file produced by this run, if any. The path is
161
- * `<XDG_CACHE_HOME>/akm/events.jsonl` per `src/core/events.ts`.
162
- *
163
- * Caps the number of bytes read at `EVENTS_READ_CAP_BYTES` (16 MiB). When the
164
- * file is larger, the prefix is parsed and a warning is appended to
165
- * `opts.warnings` (when supplied). The trailing partial line after a
166
- * truncation is dropped, since `JSON.parse` would reject it anyway.
167
- */
168
- export function readRunEvents(cacheHome, opts) {
169
- const eventsPath = path.join(cacheHome, "akm", "events.jsonl");
170
- if (!fs.existsSync(eventsPath))
171
- return [];
172
- // Read up to the cap. We open the file rather than `readFileSync` so we
173
- // don't allocate an arbitrarily large buffer just to throw most of it away.
174
- let totalSize = 0;
175
- try {
176
- totalSize = fs.statSync(eventsPath).size;
177
- }
178
- catch {
179
- return [];
180
- }
181
- const cap = EVENTS_READ_CAP_BYTES;
182
- const truncated = totalSize > cap;
183
- let text;
184
- if (truncated) {
185
- const buf = Buffer.alloc(cap);
186
- const fd = fs.openSync(eventsPath, "r");
187
- try {
188
- fs.readSync(fd, buf, 0, cap, 0);
189
- }
190
- finally {
191
- fs.closeSync(fd);
192
- }
193
- text = buf.toString("utf8");
194
- // Drop the partial trailing line so we don't try to parse half a record.
195
- const lastNl = text.lastIndexOf("\n");
196
- if (lastNl !== -1)
197
- text = text.slice(0, lastNl);
198
- if (opts?.warnings) {
199
- opts.warnings.push(`events.jsonl truncated: ${totalSize} bytes exceeds ${cap}-byte cap; trajectory computed from the prefix.`);
200
- }
201
- }
202
- else {
203
- text = fs.readFileSync(eventsPath, "utf8");
204
- }
205
- const out = [];
206
- let id = 0;
207
- for (const line of text.split("\n")) {
208
- const trimmed = line.trim();
209
- if (!trimmed)
210
- continue;
211
- try {
212
- const parsed = JSON.parse(trimmed);
213
- out.push({ ...parsed, id: parsed.id ?? id });
214
- id += 1;
215
- }
216
- catch {
217
- // Skip malformed lines — events stream is best-effort upstream.
218
- }
219
- }
220
- return out;
221
- }
222
- /** Default prompt forwarded to opencode when caller omits one. */
223
- function defaultPrompt(options) {
224
- // For non-akm arms: keep the minimal format so the model is forced to read
225
- // the workspace README.md to discover task specifics. Injecting the title
226
- // here causes the model to answer from the prompt alone and skip the README,
227
- // which breaks tasks where specific parameter values (names, IDs) only appear
228
- // in the workspace files.
229
- if (options.arm !== "akm") {
230
- return [`Task: ${options.taskId}`, `Arm: ${options.arm}`, `Workspace: ${options.workspace}`].join("\n");
231
- }
232
- const title = options.taskTitle ? `\n${options.taskTitle}` : "";
233
- const taskLine = `Task: ${options.taskId}${title}`;
234
- // Derive search keywords: prefer explicit field, fall back to task domain.
235
- const keywords = options.akmKeywords ?? options.taskId.split("/")[0].replace(/-/g, " ");
236
- // Force the model to use the bash tool to run akm CLI commands before
237
- // writing any output. Each step is an explicit bash invocation so the
238
- // model cannot skip to writing the answer without executing the commands.
239
- return [
240
- `You have access to a knowledge stash via the akm CLI tool.`,
241
- ``,
242
- `Step 1 — open a terminal and execute this bash command:`,
243
- ` bash: akm search ${keywords}`,
244
- ``,
245
- `Step 2 — from the search results, execute:`,
246
- ` bash: akm show <ref> (e.g. akm show skill:${keywords.split(" ")[0]})`,
247
- ``,
248
- `Step 3 — read README.md in the workspace to understand the specific task requirements:`,
249
- ` bash: cat ${options.workspace}/README.md`,
250
- ``,
251
- `Step 4 — using the skill content from step 2 and the task requirements from step 3,`,
252
- `write the answer to ${options.workspace}/commands.txt`,
253
- ``,
254
- `Step 5 — execute:`,
255
- ` bash: akm feedback <ref> --positive (or --negative)`,
256
- ``,
257
- `DO NOT write commands.txt before running steps 1 and 2.`,
258
- ``,
259
- taskLine,
260
- `Workspace: ${options.workspace}`,
261
- ].join("\n");
262
- }
263
- /**
264
- * Run a single (task, arm, seed) and return the v1 RunResult envelope.
265
- *
266
- * The function never throws on infrastructure failures — every error path
267
- * is captured into the returned RunResult with a stable outcome value.
268
- */
269
- export async function runOne(options) {
270
- // Stamp a baseline result; we mutate fields below as the run progresses.
271
- const result = {
272
- schemaVersion: 1,
273
- taskId: options.taskId,
274
- arm: options.arm,
275
- seed: options.seed,
276
- model: options.model,
277
- outcome: "harness_error",
278
- tokens: { input: 0, output: 0 },
279
- tokenMeasurement: "missing",
280
- wallclockMs: 0,
281
- trajectory: { correctAssetLoaded: null, feedbackRecorded: null },
282
- events: [],
283
- verifierStdout: "",
284
- verifierExitCode: -1,
285
- assetsLoaded: [],
286
- };
287
- // Look up the built-in opencode profile defensively. The lookup is a pure
288
- // map read today, but wrapping it preserves the doc-comment guarantee that
289
- // runOne never throws on infrastructure failures even if the registry
290
- // shape changes. A missing/throwing profile becomes harness_error.
291
- let profile;
292
- try {
293
- profile = getBuiltinAgentProfile("opencode");
294
- }
295
- catch (err) {
296
- result.verifierStdout = `harness: getBuiltinAgentProfile("opencode") threw: ${err instanceof Error ? err.message : String(err)}`;
297
- return result;
298
- }
299
- if (!profile) {
300
- result.verifierStdout = `harness: built-in agent profile "opencode" missing; available: ${BUILTIN_AGENT_PROFILE_NAMES.join(", ")}`;
301
- return result;
302
- }
303
- // Set up the complete bench environment: isolation dirs, opencode.json
304
- // (with BENCH_OPENCODE_INVARIANTS), akm config.json, and FTS5 index.
305
- // `dryRun: true` when a test-injected spawn is present — the fake stash
306
- // doesn't exist on disk so the akm config and index writes are skipped.
307
- let benchEnv;
308
- try {
309
- benchEnv = setupBenchEnvironment({
310
- model: options.model,
311
- arm: options.arm,
312
- stashDir: options.stashDir,
313
- indexCacheHome: options.indexCacheHome,
314
- providers: options.opencodeProviders,
315
- dryRun: !!options.spawn,
316
- warnings: options.warnings,
317
- });
318
- }
319
- catch (err) {
320
- result.verifierStdout = `harness: environment setup failed: ${err instanceof Error ? err.message : String(err)}`;
321
- return result;
322
- }
323
- const { dirs, env } = benchEnv;
324
- try {
325
- result.startedAt = new Date().toISOString();
326
- const agentResult = await runAgent(profile, options.prompt ?? defaultPrompt(options), {
327
- env,
328
- // #271: scrub operator credentials + config-dir hints from the env
329
- // source BEFORE profile.envPassthrough copies them into the child.
330
- // Without this, OPENCODE_API_KEY (in opencode's passthrough list) and
331
- // AKM_CONFIG_DIR (read by akm at startup) would leak the operator's
332
- // interactive environment into every bench child.
333
- envSource: buildSanitizedEnvSource(),
334
- cwd: options.workspace,
335
- timeoutMs: options.budgetWallMs,
336
- stdio: "captured",
337
- ...(options.spawn ? { spawn: options.spawn } : {}),
338
- });
339
- result.finishedAt = new Date().toISOString();
340
- result.wallclockMs = agentResult.durationMs;
341
- const parsed = parseTokenUsage(agentResult.stdout);
342
- result.tokens = { input: parsed.input, output: parsed.output };
343
- result.tokenMeasurement = parsed.measurement;
344
- result.events = readRunEvents(dirs.cacheHome, { warnings: options.warnings });
345
- if (!agentResult.ok) {
346
- if (agentResult.reason === "timeout") {
347
- result.outcome = "budget_exceeded";
348
- return result;
349
- }
350
- // spawn_failed / non_zero_exit / parse_error all mean the harness
351
- // itself broke; the verifier never saw the workspace.
352
- if (agentResult.reason === "spawn_failed" || agentResult.reason === "parse_error") {
353
- result.outcome = "harness_error";
354
- return result;
355
- }
356
- // non_zero_exit from the agent: intentionally falls through to the
357
- // verifier path. Per spec §5.3 ("deterministic verifiers, never LLM"),
358
- // the agent is the system under test, not the judge — its exit code
359
- // does not gate verification. The verifier always runs against
360
- // whatever workspace state the agent left behind, even on a crash.
361
- }
362
- // Token-budget enforcement is best-effort: only mark `budget_exceeded`
363
- // if measurement was actually parsed (issue #252) AND the total exceeds
364
- // the cap. A `"missing"` / `"unsupported"` measurement MUST NOT silently
365
- // mask a budget overrun as a pass — it leaves the verifier to decide.
366
- if (result.tokenMeasurement === "parsed") {
367
- const totalTokens = result.tokens.input + result.tokens.output;
368
- if (totalTokens > options.budgetTokens) {
369
- result.outcome = "budget_exceeded";
370
- return result;
371
- }
372
- }
373
- const verifierResult = await runVerifier(options.taskDir, options.workspace, options.verifier, {
374
- agentStdout: agentResult.stdout,
375
- expectedMatch: options.expectedMatch,
376
- ...(options.spawn ? { spawn: options.spawn } : {}),
377
- });
378
- result.verifierStdout = verifierResult.stdout;
379
- result.verifierExitCode = verifierResult.exitCode;
380
- if (verifierResult.exitCode === 127) {
381
- // Missing runtime (e.g. pytest not on PATH) — not the agent's fault.
382
- result.outcome = "harness_error";
383
- }
384
- else {
385
- result.outcome = verifierResult.exitCode === 0 ? "pass" : "fail";
386
- }
387
- return result;
388
- }
389
- finally {
390
- // Always tear down the isolation tmpdir. Events are read out before
391
- // deletion (see readRunEvents above), so this is safe.
392
- benchEnv.teardown();
393
- }
394
- }
395
- /** Exposed for the unit test that asserts operator env never leaks. */
396
- export const _ISOLATED_ENV_NAMES = ISOLATED_ENV_NAMES;
397
- /**
398
- * Exposed for the #271 regression test that asserts operator credentials +
399
- * `AKM_CONFIG_DIR` never reach a bench-arm child via profile.envPassthrough.
400
- */
401
- export const _SCRUBBED_OPERATOR_ENV_NAMES = SCRUBBED_OPERATOR_ENV_NAMES;