akm-cli 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (332) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/dist/{src/cli.js → cli.js} +22 -8
  3. package/dist/{src/commands → commands}/installed-stashes.js +1 -1
  4. package/dist/{src/commands → commands}/source-add.js +1 -1
  5. package/dist/{src/core → core}/common.js +16 -1
  6. package/dist/{src/core → core}/config.js +5 -2
  7. package/dist/{src/indexer → indexer}/db-search.js +16 -1
  8. package/dist/{src/indexer → indexer}/graph-extraction.js +5 -3
  9. package/dist/{src/indexer → indexer}/indexer.js +27 -11
  10. package/dist/{src/indexer → indexer}/memory-inference.js +47 -58
  11. package/dist/{src/indexer → indexer}/search-source.js +1 -1
  12. package/dist/{src/llm → llm}/client.js +61 -1
  13. package/dist/{src/llm → llm}/embedder.js +8 -5
  14. package/dist/{src/llm → llm}/embedders/local.js +8 -2
  15. package/dist/{src/llm → llm}/embedders/remote.js +4 -2
  16. package/dist/{src/llm → llm}/graph-extract.js +4 -4
  17. package/dist/llm/memory-infer.js +114 -0
  18. package/dist/{src/llm → llm}/metadata-enhance.js +2 -2
  19. package/dist/{src/output → output}/cli-hints.js +2 -0
  20. package/dist/{src/setup → setup}/setup.js +30 -20
  21. package/dist/sources/providers/website.js +27 -0
  22. package/dist/{src/sources/providers/website.js → sources/website-ingest.js} +38 -51
  23. package/docs/README.md +7 -0
  24. package/docs/migration/release-notes/0.7.0.md +14 -0
  25. package/package.json +11 -8
  26. package/dist/src/llm/memory-infer.js +0 -86
  27. package/dist/tests/add-website-source.test.js +0 -119
  28. package/dist/tests/agent/agent-config-loader.test.js +0 -70
  29. package/dist/tests/agent/agent-config.test.js +0 -221
  30. package/dist/tests/agent/agent-detect.test.js +0 -100
  31. package/dist/tests/agent/agent-spawn.test.js +0 -234
  32. package/dist/tests/agent-output.test.js +0 -186
  33. package/dist/tests/architecture/agent-no-llm-sdk-guard.test.js +0 -103
  34. package/dist/tests/architecture/agent-spawn-seam.test.js +0 -193
  35. package/dist/tests/architecture/llm-stateless-seam.test.js +0 -112
  36. package/dist/tests/asset-ref.test.js +0 -192
  37. package/dist/tests/asset-registry.test.js +0 -103
  38. package/dist/tests/asset-spec.test.js +0 -241
  39. package/dist/tests/bench/attribution.test.js +0 -996
  40. package/dist/tests/bench/cleanup-sigint.test.js +0 -83
  41. package/dist/tests/bench/cleanup.js +0 -234
  42. package/dist/tests/bench/cleanup.test.js +0 -166
  43. package/dist/tests/bench/cli.js +0 -1018
  44. package/dist/tests/bench/cli.test.js +0 -445
  45. package/dist/tests/bench/compare.test.js +0 -556
  46. package/dist/tests/bench/corpus.js +0 -317
  47. package/dist/tests/bench/corpus.test.js +0 -258
  48. package/dist/tests/bench/doctor.js +0 -525
  49. package/dist/tests/bench/driver.js +0 -401
  50. package/dist/tests/bench/driver.test.js +0 -584
  51. package/dist/tests/bench/environment.js +0 -233
  52. package/dist/tests/bench/environment.test.js +0 -199
  53. package/dist/tests/bench/evolve-metrics.js +0 -179
  54. package/dist/tests/bench/evolve-metrics.test.js +0 -187
  55. package/dist/tests/bench/evolve.js +0 -647
  56. package/dist/tests/bench/evolve.test.js +0 -624
  57. package/dist/tests/bench/failure-modes.test.js +0 -349
  58. package/dist/tests/bench/feedback-integrity.test.js +0 -457
  59. package/dist/tests/bench/leakage.test.js +0 -228
  60. package/dist/tests/bench/learning-curve.test.js +0 -134
  61. package/dist/tests/bench/metrics.js +0 -2395
  62. package/dist/tests/bench/metrics.test.js +0 -1150
  63. package/dist/tests/bench/no-os-tmpdir-invariant.test.js +0 -43
  64. package/dist/tests/bench/opencode-config.js +0 -194
  65. package/dist/tests/bench/opencode-config.test.js +0 -370
  66. package/dist/tests/bench/report.js +0 -1885
  67. package/dist/tests/bench/report.test.js +0 -1038
  68. package/dist/tests/bench/run-config.js +0 -355
  69. package/dist/tests/bench/run-config.test.js +0 -298
  70. package/dist/tests/bench/run-curate-test.js +0 -32
  71. package/dist/tests/bench/run-failing-tasks.js +0 -56
  72. package/dist/tests/bench/run-full-bench.js +0 -51
  73. package/dist/tests/bench/run-items36-targeted.js +0 -69
  74. package/dist/tests/bench/run-nano-quick.js +0 -42
  75. package/dist/tests/bench/run-waveg-targeted.js +0 -62
  76. package/dist/tests/bench/runner.js +0 -699
  77. package/dist/tests/bench/runner.test.js +0 -958
  78. package/dist/tests/bench/search-bridge.test.js +0 -331
  79. package/dist/tests/bench/tmp.js +0 -131
  80. package/dist/tests/bench/trajectory.js +0 -116
  81. package/dist/tests/bench/trajectory.test.js +0 -127
  82. package/dist/tests/bench/verifier.js +0 -114
  83. package/dist/tests/bench/verifier.test.js +0 -118
  84. package/dist/tests/bench/workflow-evaluator.js +0 -557
  85. package/dist/tests/bench/workflow-evaluator.test.js +0 -421
  86. package/dist/tests/bench/workflow-spec.js +0 -345
  87. package/dist/tests/bench/workflow-spec.test.js +0 -363
  88. package/dist/tests/bench/workflow-trace.js +0 -472
  89. package/dist/tests/bench/workflow-trace.test.js +0 -254
  90. package/dist/tests/benchmark-search-quality.js +0 -536
  91. package/dist/tests/benchmark-suite.js +0 -1441
  92. package/dist/tests/capture-cli.test.js +0 -112
  93. package/dist/tests/cli-errors.test.js +0 -204
  94. package/dist/tests/commands/events.test.js +0 -370
  95. package/dist/tests/commands/history.test.js +0 -418
  96. package/dist/tests/commands/import.test.js +0 -103
  97. package/dist/tests/commands/proposal-cli.test.js +0 -209
  98. package/dist/tests/commands/reflect-propose-cli.test.js +0 -333
  99. package/dist/tests/commands/remember.test.js +0 -97
  100. package/dist/tests/commands/scope-flags.test.js +0 -300
  101. package/dist/tests/commands/search.test.js +0 -537
  102. package/dist/tests/commands/show-indexer-parity.test.js +0 -117
  103. package/dist/tests/commands/show.test.js +0 -294
  104. package/dist/tests/common.test.js +0 -266
  105. package/dist/tests/completions.test.js +0 -142
  106. package/dist/tests/config-cli.test.js +0 -193
  107. package/dist/tests/config-llm-features.test.js +0 -139
  108. package/dist/tests/config.test.js +0 -569
  109. package/dist/tests/contracts/migration-baseline.test.js +0 -43
  110. package/dist/tests/contracts/reflect-propose-envelope.test.js +0 -139
  111. package/dist/tests/contracts/spec-helpers.js +0 -46
  112. package/dist/tests/contracts/v1-spec-section-11-proposal-queue.test.js +0 -228
  113. package/dist/tests/contracts/v1-spec-section-12-agent-config.test.js +0 -56
  114. package/dist/tests/contracts/v1-spec-section-13-lesson-type.test.js +0 -34
  115. package/dist/tests/contracts/v1-spec-section-14-llm-features.test.js +0 -94
  116. package/dist/tests/contracts/v1-spec-section-4-1-asset-types.test.js +0 -39
  117. package/dist/tests/contracts/v1-spec-section-4-2-quality-rules.test.js +0 -44
  118. package/dist/tests/contracts/v1-spec-section-5-configuration.test.js +0 -47
  119. package/dist/tests/contracts/v1-spec-section-6-orchestration.test.js +0 -40
  120. package/dist/tests/contracts/v1-spec-section-7-module-layout.test.js +0 -58
  121. package/dist/tests/contracts/v1-spec-section-8-extension-points.test.js +0 -34
  122. package/dist/tests/contracts/v1-spec-section-9-4-cli-surface.test.js +0 -75
  123. package/dist/tests/contracts/v1-spec-section-9-7-llm-agent-boundary.test.js +0 -36
  124. package/dist/tests/core/write-source.test.js +0 -366
  125. package/dist/tests/curate-command.test.js +0 -87
  126. package/dist/tests/db-scoring.test.js +0 -201
  127. package/dist/tests/db.test.js +0 -654
  128. package/dist/tests/distill-cli-flag.test.js +0 -208
  129. package/dist/tests/distill.test.js +0 -515
  130. package/dist/tests/docker-install.test.js +0 -120
  131. package/dist/tests/e2e.test.js +0 -1419
  132. package/dist/tests/embedder.test.js +0 -340
  133. package/dist/tests/embedding-model-config.test.js +0 -379
  134. package/dist/tests/feedback-command.test.js +0 -172
  135. package/dist/tests/file-context.test.js +0 -552
  136. package/dist/tests/fixtures/scripts/git/summarize-diff.js +0 -9
  137. package/dist/tests/fixtures/scripts/lint/eslint-check.js +0 -7
  138. package/dist/tests/fixtures/stashes/load.js +0 -166
  139. package/dist/tests/fixtures/stashes/load.test.js +0 -97
  140. package/dist/tests/fixtures/stashes/ranking-baseline/scripts/mem0-search.js +0 -12
  141. package/dist/tests/frontmatter.test.js +0 -190
  142. package/dist/tests/fts-field-weighting.test.js +0 -254
  143. package/dist/tests/fuzzy-search.test.js +0 -230
  144. package/dist/tests/git-provider-clone.test.js +0 -45
  145. package/dist/tests/github.test.js +0 -161
  146. package/dist/tests/graph-boost-ranking.test.js +0 -305
  147. package/dist/tests/graph-extraction.test.js +0 -282
  148. package/dist/tests/helpers/usage-events.js +0 -8
  149. package/dist/tests/index-pass-llm.test.js +0 -161
  150. package/dist/tests/indexer.test.js +0 -570
  151. package/dist/tests/info-command.test.js +0 -166
  152. package/dist/tests/init.test.js +0 -69
  153. package/dist/tests/install-script.test.js +0 -246
  154. package/dist/tests/integration/agent-real-profile.test.js +0 -94
  155. package/dist/tests/issue-36-repro.test.js +0 -304
  156. package/dist/tests/issues-191-194.test.js +0 -160
  157. package/dist/tests/lesson-lint.test.js +0 -111
  158. package/dist/tests/llm-client.test.js +0 -115
  159. package/dist/tests/llm-feature-gate.test.js +0 -151
  160. package/dist/tests/llm.test.js +0 -139
  161. package/dist/tests/lockfile.test.js +0 -216
  162. package/dist/tests/manifest.test.js +0 -205
  163. package/dist/tests/markdown.test.js +0 -126
  164. package/dist/tests/matchers-unit.test.js +0 -189
  165. package/dist/tests/memory-inference.test.js +0 -299
  166. package/dist/tests/merge-scoring.test.js +0 -136
  167. package/dist/tests/metadata.test.js +0 -313
  168. package/dist/tests/migration-help.test.js +0 -89
  169. package/dist/tests/origin-resolve.test.js +0 -124
  170. package/dist/tests/output-baseline.test.js +0 -218
  171. package/dist/tests/output-shapes-unit.test.js +0 -478
  172. package/dist/tests/parallel-search.test.js +0 -272
  173. package/dist/tests/parameter-metadata.test.js +0 -365
  174. package/dist/tests/paths.test.js +0 -177
  175. package/dist/tests/progressive-disclosure.test.js +0 -280
  176. package/dist/tests/proposals.test.js +0 -279
  177. package/dist/tests/proposed-quality.test.js +0 -271
  178. package/dist/tests/provider-registry.test.js +0 -32
  179. package/dist/tests/ranking-regression.test.js +0 -548
  180. package/dist/tests/reflect-propose.test.js +0 -455
  181. package/dist/tests/registry-build-index.test.js +0 -394
  182. package/dist/tests/registry-cli.test.js +0 -290
  183. package/dist/tests/registry-index-v2.test.js +0 -430
  184. package/dist/tests/registry-install.test.js +0 -728
  185. package/dist/tests/registry-providers/parity.test.js +0 -189
  186. package/dist/tests/registry-providers/skills-sh.test.js +0 -309
  187. package/dist/tests/registry-providers/static-index.test.js +0 -238
  188. package/dist/tests/registry-resolve.test.js +0 -126
  189. package/dist/tests/registry-search.test.js +0 -923
  190. package/dist/tests/remember-frontmatter.test.js +0 -378
  191. package/dist/tests/remember-unit.test.js +0 -123
  192. package/dist/tests/ripgrep-install.test.js +0 -251
  193. package/dist/tests/ripgrep-resolve.test.js +0 -108
  194. package/dist/tests/ripgrep.test.js +0 -163
  195. package/dist/tests/save-command.test.js +0 -94
  196. package/dist/tests/save-trust-qa-fixes.test.js +0 -270
  197. package/dist/tests/scoring-pipeline.test.js +0 -648
  198. package/dist/tests/search-include-proposed-cli.test.js +0 -118
  199. package/dist/tests/self-update.test.js +0 -442
  200. package/dist/tests/semantic-search-e2e.test.js +0 -512
  201. package/dist/tests/semantic-status.test.js +0 -471
  202. package/dist/tests/setup-run.integration.js +0 -877
  203. package/dist/tests/setup-wizard.test.js +0 -198
  204. package/dist/tests/setup.test.js +0 -131
  205. package/dist/tests/source-add.test.js +0 -11
  206. package/dist/tests/source-clone.test.js +0 -254
  207. package/dist/tests/source-manage.test.js +0 -366
  208. package/dist/tests/source-providers/filesystem.test.js +0 -82
  209. package/dist/tests/source-providers/git.test.js +0 -252
  210. package/dist/tests/source-providers/website.test.js +0 -128
  211. package/dist/tests/source-qa-fixes.test.js +0 -286
  212. package/dist/tests/source-registry.test.js +0 -350
  213. package/dist/tests/source-resolve.test.js +0 -100
  214. package/dist/tests/source-source.test.js +0 -281
  215. package/dist/tests/source.test.js +0 -533
  216. package/dist/tests/tar-utils-scan.test.js +0 -73
  217. package/dist/tests/toggle-components.test.js +0 -73
  218. package/dist/tests/usage-telemetry.test.js +0 -265
  219. package/dist/tests/utility-scoring.test.js +0 -558
  220. package/dist/tests/vault-load-error.test.js +0 -78
  221. package/dist/tests/vault-qa-fixes.test.js +0 -194
  222. package/dist/tests/vault.test.js +0 -429
  223. package/dist/tests/vector-search.test.js +0 -608
  224. package/dist/tests/walker.test.js +0 -252
  225. package/dist/tests/wave2-cluster-bc.test.js +0 -228
  226. package/dist/tests/wave2-cluster-d.test.js +0 -180
  227. package/dist/tests/wave2-cluster-e.test.js +0 -179
  228. package/dist/tests/wiki-qa-fixes.test.js +0 -270
  229. package/dist/tests/wiki.test.js +0 -529
  230. package/dist/tests/workflow-cli.test.js +0 -271
  231. package/dist/tests/workflow-markdown.test.js +0 -171
  232. package/dist/tests/workflow-path-escape.test.js +0 -132
  233. package/dist/tests/workflow-qa-fixes.test.js +0 -395
  234. package/dist/tests/workflows/indexer-rejection.test.js +0 -213
  235. /package/dist/{src/commands → commands}/completions.js +0 -0
  236. /package/dist/{src/commands → commands}/config-cli.js +0 -0
  237. /package/dist/{src/commands → commands}/curate.js +0 -0
  238. /package/dist/{src/commands → commands}/distill.js +0 -0
  239. /package/dist/{src/commands → commands}/events.js +0 -0
  240. /package/dist/{src/commands → commands}/history.js +0 -0
  241. /package/dist/{src/commands → commands}/info.js +0 -0
  242. /package/dist/{src/commands → commands}/init.js +0 -0
  243. /package/dist/{src/commands → commands}/install-audit.js +0 -0
  244. /package/dist/{src/commands → commands}/migration-help.js +0 -0
  245. /package/dist/{src/commands → commands}/proposal.js +0 -0
  246. /package/dist/{src/commands → commands}/propose.js +0 -0
  247. /package/dist/{src/commands → commands}/reflect.js +0 -0
  248. /package/dist/{src/commands → commands}/registry-search.js +0 -0
  249. /package/dist/{src/commands → commands}/remember.js +0 -0
  250. /package/dist/{src/commands → commands}/search.js +0 -0
  251. /package/dist/{src/commands → commands}/self-update.js +0 -0
  252. /package/dist/{src/commands → commands}/show.js +0 -0
  253. /package/dist/{src/commands → commands}/source-clone.js +0 -0
  254. /package/dist/{src/commands → commands}/source-manage.js +0 -0
  255. /package/dist/{src/commands → commands}/vault.js +0 -0
  256. /package/dist/{src/core → core}/asset-ref.js +0 -0
  257. /package/dist/{src/core → core}/asset-registry.js +0 -0
  258. /package/dist/{src/core → core}/asset-spec.js +0 -0
  259. /package/dist/{src/core → core}/errors.js +0 -0
  260. /package/dist/{src/core → core}/events.js +0 -0
  261. /package/dist/{src/core → core}/frontmatter.js +0 -0
  262. /package/dist/{src/core → core}/lesson-lint.js +0 -0
  263. /package/dist/{src/core → core}/markdown.js +0 -0
  264. /package/dist/{src/core → core}/paths.js +0 -0
  265. /package/dist/{src/core → core}/proposals.js +0 -0
  266. /package/dist/{src/core → core}/warn.js +0 -0
  267. /package/dist/{src/core → core}/write-source.js +0 -0
  268. /package/dist/{src/indexer → indexer}/db.js +0 -0
  269. /package/dist/{src/indexer → indexer}/file-context.js +0 -0
  270. /package/dist/{src/indexer → indexer}/graph-boost.js +0 -0
  271. /package/dist/{src/indexer → indexer}/manifest.js +0 -0
  272. /package/dist/{src/indexer → indexer}/matchers.js +0 -0
  273. /package/dist/{src/indexer → indexer}/metadata.js +0 -0
  274. /package/dist/{src/indexer → indexer}/search-fields.js +0 -0
  275. /package/dist/{src/indexer → indexer}/semantic-status.js +0 -0
  276. /package/dist/{src/indexer → indexer}/usage-events.js +0 -0
  277. /package/dist/{src/indexer → indexer}/walker.js +0 -0
  278. /package/dist/{src/integrations → integrations}/agent/config.js +0 -0
  279. /package/dist/{src/integrations → integrations}/agent/detect.js +0 -0
  280. /package/dist/{src/integrations → integrations}/agent/index.js +0 -0
  281. /package/dist/{src/integrations → integrations}/agent/profiles.js +0 -0
  282. /package/dist/{src/integrations → integrations}/agent/prompts.js +0 -0
  283. /package/dist/{src/integrations → integrations}/agent/spawn.js +0 -0
  284. /package/dist/{src/integrations → integrations}/github.js +0 -0
  285. /package/dist/{src/integrations → integrations}/lockfile.js +0 -0
  286. /package/dist/{src/llm → llm}/embedders/cache.js +0 -0
  287. /package/dist/{src/llm → llm}/embedders/types.js +0 -0
  288. /package/dist/{src/llm → llm}/feature-gate.js +0 -0
  289. /package/dist/{src/llm → llm}/index-passes.js +0 -0
  290. /package/dist/{src/output → output}/context.js +0 -0
  291. /package/dist/{src/output → output}/renderers.js +0 -0
  292. /package/dist/{src/output → output}/shapes.js +0 -0
  293. /package/dist/{src/output → output}/text.js +0 -0
  294. /package/dist/{src/registry → registry}/build-index.js +0 -0
  295. /package/dist/{src/registry → registry}/create-provider-registry.js +0 -0
  296. /package/dist/{src/registry → registry}/factory.js +0 -0
  297. /package/dist/{src/registry → registry}/origin-resolve.js +0 -0
  298. /package/dist/{src/registry → registry}/providers/index.js +0 -0
  299. /package/dist/{src/registry → registry}/providers/skills-sh.js +0 -0
  300. /package/dist/{src/registry → registry}/providers/static-index.js +0 -0
  301. /package/dist/{src/registry → registry}/providers/types.js +0 -0
  302. /package/dist/{src/registry → registry}/resolve.js +0 -0
  303. /package/dist/{src/registry → registry}/types.js +0 -0
  304. /package/dist/{src/setup → setup}/detect.js +0 -0
  305. /package/dist/{src/setup → setup}/ripgrep-install.js +0 -0
  306. /package/dist/{src/setup → setup}/ripgrep-resolve.js +0 -0
  307. /package/dist/{src/setup → setup}/steps.js +0 -0
  308. /package/dist/{src/sources → sources}/include.js +0 -0
  309. /package/dist/{src/sources → sources}/provider-factory.js +0 -0
  310. /package/dist/{src/sources → sources}/provider.js +0 -0
  311. /package/dist/{src/sources → sources}/providers/filesystem.js +0 -0
  312. /package/dist/{src/sources → sources}/providers/git.js +0 -0
  313. /package/dist/{src/sources → sources}/providers/index.js +0 -0
  314. /package/dist/{src/sources → sources}/providers/install-types.js +0 -0
  315. /package/dist/{src/sources → sources}/providers/npm.js +0 -0
  316. /package/dist/{src/sources → sources}/providers/provider-utils.js +0 -0
  317. /package/dist/{src/sources → sources}/providers/sync-from-ref.js +0 -0
  318. /package/dist/{src/sources → sources}/providers/tar-utils.js +0 -0
  319. /package/dist/{src/sources → sources}/resolve.js +0 -0
  320. /package/dist/{src/sources → sources}/types.js +0 -0
  321. /package/dist/{src/templates → templates}/wiki-templates.js +0 -0
  322. /package/dist/{src/version.js → version.js} +0 -0
  323. /package/dist/{src/wiki → wiki}/wiki.js +0 -0
  324. /package/dist/{src/workflows → workflows}/authoring.js +0 -0
  325. /package/dist/{src/workflows → workflows}/cli.js +0 -0
  326. /package/dist/{src/workflows → workflows}/db.js +0 -0
  327. /package/dist/{src/workflows → workflows}/document-cache.js +0 -0
  328. /package/dist/{src/workflows → workflows}/parser.js +0 -0
  329. /package/dist/{src/workflows → workflows}/renderer.js +0 -0
  330. /package/dist/{src/workflows → workflows}/runs.js +0 -0
  331. /package/dist/{src/workflows → workflows}/schema.js +0 -0
  332. /package/dist/{src/workflows → workflows}/validator.js +0 -0
@@ -1,584 +0,0 @@
1
- /**
2
- * Unit tests for the bench driver — exercises every RunResult outcome
3
- * (`pass`, `fail`, `budget_exceeded`, `harness_error`) via an injected fake
4
- * spawn. Real opencode is never invoked.
5
- */
6
- import { afterAll, beforeAll, describe, expect, test } from "bun:test";
7
- import fs from "node:fs";
8
- import path from "node:path";
9
- import { _ISOLATED_ENV_NAMES, _SCRUBBED_OPERATOR_ENV_NAMES, buildIsolatedEnv, buildSanitizedEnvSource, createIsolationDirs, EVENTS_READ_CAP_BYTES, parseTokenUsage, readRunEvents, runOne, stripAkmStashDir, } from "./driver";
10
- import { benchMkdtemp } from "./tmp";
11
- function asReadableStream(text) {
12
- const bytes = new TextEncoder().encode(text);
13
- return new ReadableStream({
14
- start(controller) {
15
- controller.enqueue(bytes);
16
- controller.close();
17
- },
18
- });
19
- }
20
- /**
21
- * Build a spawn fn that scripts the agent run first, then any subsequent
22
- * verifier run. Distinguishes by command: opencode is the configured `bin`
23
- * for the built-in opencode profile (i.e. cmd[0] === "opencode"); anything
24
- * else is a verifier.
25
- */
26
- function scriptedSpawn(agent, verifier) {
27
- const invocations = [];
28
- const spawn = (cmd, options) => {
29
- invocations.push({ cmd, env: options.env });
30
- const isAgent = cmd[0] === "opencode";
31
- const config = isAgent ? agent : (verifier ?? { exitCode: 0, stdout: "" });
32
- if (isAgent && agent.throwSync)
33
- throw agent.throwSync;
34
- let resolveExit = () => { };
35
- const exited = new Promise((resolve) => {
36
- resolveExit = resolve;
37
- if (!(isAgent && agent.hangsUntilKilled))
38
- resolve(config.exitCode);
39
- });
40
- const proc = {
41
- exitCode: isAgent && agent.hangsUntilKilled ? null : config.exitCode,
42
- exited,
43
- stdout: asReadableStream(config.stdout ?? ""),
44
- stderr: asReadableStream(config.stderr ?? ""),
45
- stdin: null,
46
- kill() {
47
- // Honour kill so timeout path resolves cleanly.
48
- resolveExit(143);
49
- },
50
- };
51
- return proc;
52
- };
53
- return { spawn, invocations };
54
- }
55
- const baseOptions = {
56
- track: "utility",
57
- arm: "noakm",
58
- taskId: "_example/example-task",
59
- workspace: "",
60
- model: "anthropic/claude-opus-4-7",
61
- seed: 0,
62
- budgetTokens: 100000,
63
- budgetWallMs: 60_000,
64
- verifier: "regex",
65
- taskDir: "",
66
- expectedMatch: "ok",
67
- };
68
- describe("runOne", () => {
69
- let workspace;
70
- beforeAll(() => {
71
- workspace = benchMkdtemp("bench-driver-test-");
72
- });
73
- afterAll(() => {
74
- fs.rmSync(workspace, { recursive: true, force: true });
75
- });
76
- test("pass: agent exits 0, verifier exits 0", async () => {
77
- const { spawn, invocations } = scriptedSpawn({ exitCode: 0, stdout: "ok" });
78
- const result = await runOne({ ...baseOptions, workspace, spawn });
79
- expect(result.outcome).toBe("pass");
80
- expect(result.verifierExitCode).toBe(0);
81
- expect(result.taskId).toBe("_example/example-task");
82
- expect(result.model).toBe("anthropic/claude-opus-4-7");
83
- expect(result.seed).toBe(0);
84
- expect(result.schemaVersion).toBe(1);
85
- expect(invocations[0]?.cmd[0]).toBe("opencode");
86
- });
87
- test("fail: agent exits 0 but verifier rejects output", async () => {
88
- const { spawn } = scriptedSpawn({ exitCode: 0, stdout: "nope" });
89
- const result = await runOne({ ...baseOptions, workspace, spawn });
90
- expect(result.outcome).toBe("fail");
91
- expect(result.verifierExitCode).toBe(1);
92
- });
93
- test("budget_exceeded: agent times out (runAgent reason: timeout)", async () => {
94
- const { spawn } = scriptedSpawn({ exitCode: 0, hangsUntilKilled: true });
95
- const result = await runOne({
96
- ...baseOptions,
97
- workspace,
98
- spawn,
99
- // Tiny budget so the timer fires before the fake agent ever exits.
100
- budgetWallMs: 50,
101
- });
102
- expect(result.outcome).toBe("budget_exceeded");
103
- });
104
- test("harness_error: agent spawn throws synchronously", async () => {
105
- const { spawn } = scriptedSpawn({ exitCode: 0, throwSync: new Error("ENOENT") });
106
- const result = await runOne({ ...baseOptions, workspace, spawn });
107
- expect(result.outcome).toBe("harness_error");
108
- });
109
- test("budget_exceeded: parsed token usage exceeds budgetTokens", async () => {
110
- // Agent reports 70k input + 50k output = 120k tokens, budget is 100k.
111
- // Verifier should NOT run; outcome must be budget_exceeded.
112
- const { spawn } = scriptedSpawn({
113
- exitCode: 0,
114
- stdout: "input_tokens: 70000 output_tokens: 50000",
115
- });
116
- const result = await runOne({
117
- ...baseOptions,
118
- workspace,
119
- spawn,
120
- budgetTokens: 100_000,
121
- });
122
- expect(result.outcome).toBe("budget_exceeded");
123
- expect(result.tokens.input + result.tokens.output).toBeGreaterThan(100_000);
124
- expect(result.tokens.input).toBe(70_000);
125
- expect(result.tokens.output).toBe(50_000);
126
- expect(result.tokenMeasurement).toBe("parsed");
127
- });
128
- test("tokenMeasurement: parsed when stdout reports tokens", async () => {
129
- const { spawn } = scriptedSpawn({
130
- exitCode: 0,
131
- stdout: "ok\ninput_tokens: 10 output_tokens: 5",
132
- });
133
- const result = await runOne({ ...baseOptions, workspace, spawn });
134
- expect(result.outcome).toBe("pass");
135
- expect(result.tokenMeasurement).toBe("parsed");
136
- expect(result.tokens.input).toBe(10);
137
- expect(result.tokens.output).toBe(5);
138
- });
139
- test("tokenMeasurement: missing when stdout has no token line — and budget is NOT enforced", async () => {
140
- // Agent never reports tokens. budgetTokens is 1, but the harness must not
141
- // mark this as budget_exceeded (issue #252) — measurement is missing.
142
- const { spawn } = scriptedSpawn({ exitCode: 0, stdout: "ok" });
143
- const result = await runOne({ ...baseOptions, workspace, spawn, budgetTokens: 1 });
144
- expect(result.tokenMeasurement).toBe("missing");
145
- expect(result.tokens).toEqual({ input: 0, output: 0 });
146
- expect(result.outcome).not.toBe("budget_exceeded");
147
- });
148
- test("tokenMeasurement: harness_error path leaves measurement as 'missing'", async () => {
149
- const { spawn } = scriptedSpawn({ exitCode: 0, throwSync: new Error("ENOENT") });
150
- const result = await runOne({ ...baseOptions, workspace, spawn });
151
- expect(result.outcome).toBe("harness_error");
152
- // No agent stdout was ever observed → measurement stays at the default.
153
- expect(result.tokenMeasurement).toBe("missing");
154
- });
155
- test("isolation: child env carries pinned XDG/OPENCODE/AKM dirs and not operator values", async () => {
156
- const sentinel = "/tmp/operator-config-must-not-leak";
157
- const priors = {};
158
- for (const name of _ISOLATED_ENV_NAMES) {
159
- priors[name] = process.env[name];
160
- process.env[name] = sentinel;
161
- }
162
- try {
163
- const { spawn, invocations } = scriptedSpawn({ exitCode: 0, stdout: "ok" });
164
- await runOne({
165
- ...baseOptions,
166
- workspace,
167
- stashDir: "/tmp/some-stash",
168
- arm: "akm",
169
- spawn,
170
- });
171
- const childEnv = invocations[0]?.env ?? {};
172
- // Each isolated key MUST be present and MUST NOT equal the operator sentinel.
173
- for (const name of _ISOLATED_ENV_NAMES) {
174
- expect(childEnv[name]).toBeDefined();
175
- expect(childEnv[name]).not.toBe(sentinel);
176
- }
177
- expect(childEnv.AKM_STASH_DIR).toBe("/tmp/some-stash");
178
- expect(childEnv.BENCH_OPENCODE_MODEL).toBe("anthropic/claude-opus-4-7");
179
- }
180
- finally {
181
- for (const name of _ISOLATED_ENV_NAMES) {
182
- if (priors[name] === undefined)
183
- delete process.env[name];
184
- else
185
- process.env[name] = priors[name];
186
- }
187
- }
188
- });
189
- // ── #271: operator-env isolation (OPENCODE_API_KEY/ANTHROPIC_API_KEY/AKM_CONFIG_DIR)
190
- test("operator env isolation: bench child never inherits OPENCODE_API_KEY/ANTHROPIC_API_KEY/AKM_CONFIG_DIR (#271)", async () => {
191
- // Even though `OPENCODE_API_KEY` is in the opencode profile's
192
- // `envPassthrough` list, the bench driver MUST scrub these operator-env
193
- // names before profile.envPassthrough copies them into the child. This
194
- // is the regression guard the #271 review identified — without it,
195
- // operator credentials and the operator's `AKM_CONFIG_DIR` would leak
196
- // into every (task × arm × seed) child.
197
- const sentinels = {
198
- OPENCODE_API_KEY: "sentinel-A-must-not-leak",
199
- ANTHROPIC_API_KEY: "sentinel-B-must-not-leak",
200
- AKM_CONFIG_DIR: "sentinel-C-must-not-leak",
201
- };
202
- const priors = {};
203
- for (const [name, value] of Object.entries(sentinels)) {
204
- priors[name] = process.env[name];
205
- process.env[name] = value;
206
- }
207
- try {
208
- const { spawn, invocations } = scriptedSpawn({ exitCode: 0, stdout: "ok" });
209
- await runOne({
210
- ...baseOptions,
211
- workspace,
212
- arm: "akm",
213
- stashDir: "/tmp/some-stash",
214
- spawn,
215
- });
216
- const childEnv = invocations[0]?.env ?? {};
217
- // None of the operator sentinels reach the child env that runAgent
218
- // hands to spawn — neither as a key:value pair nor as a substring
219
- // match (paranoid: confirm the literal sentinel strings are absent
220
- // even from values like `OPENCODE_CONFIG`).
221
- for (const name of _SCRUBBED_OPERATOR_ENV_NAMES) {
222
- expect(childEnv[name]).toBeUndefined();
223
- }
224
- for (const sentinel of Object.values(sentinels)) {
225
- for (const value of Object.values(childEnv)) {
226
- expect(value).not.toContain(sentinel);
227
- }
228
- }
229
- // The explicit bench keys ARE present and pinned to the per-run
230
- // tmpdirs (sanity: the scrubbing didn't accidentally drop them).
231
- expect(childEnv.XDG_CACHE_HOME).toBeDefined();
232
- expect(childEnv.XDG_CONFIG_HOME).toBeDefined();
233
- expect(childEnv.OPENCODE_CONFIG).toBeDefined();
234
- expect(childEnv.AKM_STASH_DIR).toBe("/tmp/some-stash");
235
- expect(childEnv.BENCH_OPENCODE_MODEL).toBe("anthropic/claude-opus-4-7");
236
- }
237
- finally {
238
- for (const [name, prior] of Object.entries(priors)) {
239
- if (prior === undefined)
240
- delete process.env[name];
241
- else
242
- process.env[name] = prior;
243
- }
244
- }
245
- });
246
- // ── #261: synthetic-arm AKM_STASH_DIR isolation ─────────────────────────────
247
- test("synthetic arm: child env never carries AKM_STASH_DIR (recurrence guard for #243 fixup)", async () => {
248
- // CRITICAL: synthetic-arm runs MUST NOT carry AKM_STASH_DIR. Without
249
- // this guard the operator's real AKM_STASH_DIR leaks in via parent-env
250
- // inheritance — exactly the failure mode the #243 fixup chased. We
251
- // exercise both the explicit-stashDir case (bad caller passes one
252
- // anyway) and the no-stashDir case.
253
- const operatorStash = "/tmp/operator-stash-must-never-leak-into-synthetic";
254
- const prior = process.env.AKM_STASH_DIR;
255
- process.env.AKM_STASH_DIR = operatorStash;
256
- try {
257
- // 1) Synthetic arm with NO stashDir option: AKM_STASH_DIR must be
258
- // absent in the child env.
259
- const { spawn, invocations } = scriptedSpawn({ exitCode: 0, stdout: "ok" });
260
- await runOne({
261
- ...baseOptions,
262
- workspace,
263
- arm: "synthetic",
264
- spawn,
265
- });
266
- const childEnv1 = invocations[0]?.env ?? {};
267
- expect(childEnv1.AKM_STASH_DIR).toBeUndefined();
268
- expect(childEnv1.AKM_STASH_DIR).not.toBe(operatorStash);
269
- // 2) Even when a buggy caller forwards a stashDir to the synthetic
270
- // arm, the driver MUST refuse to wire it into the child env.
271
- const { spawn: spawn2, invocations: invocations2 } = scriptedSpawn({ exitCode: 0, stdout: "ok" });
272
- await runOne({
273
- ...baseOptions,
274
- workspace,
275
- arm: "synthetic",
276
- stashDir: "/tmp/buggy-caller-stash",
277
- spawn: spawn2,
278
- });
279
- const childEnv2 = invocations2[0]?.env ?? {};
280
- expect(childEnv2.AKM_STASH_DIR).toBeUndefined();
281
- }
282
- finally {
283
- if (prior === undefined)
284
- delete process.env.AKM_STASH_DIR;
285
- else
286
- process.env.AKM_STASH_DIR = prior;
287
- }
288
- });
289
- // ── opencodeProviders: materialise tests ──────────────────────────────────
290
- test("runOne with opencodeProviders writes opencode.json into OPENCODE_CONFIG before spawn", async () => {
291
- // We need to capture the OPENCODE_CONFIG path from the child env to
292
- // check the file was written. We do this by saving it from the spawn
293
- // invocation then checking AFTER the run returns (before dir teardown
294
- // occurs — note: driver tears down dirs in finally; but we copy the path
295
- // from the invocation). Actually: dirs are torn down in the driver's
296
- // finally block AFTER runAgent returns, so by the time our fake spawn
297
- // is called the file SHOULD be present. We check via a closure.
298
- let capturedOpencodeCfgDir;
299
- let fileExistedAtSpawnTime = false;
300
- const checkingSpawn = (cmd, options) => {
301
- // Capture the OPENCODE_CONFIG dir from the child env.
302
- const env = options.env;
303
- if (env?.OPENCODE_CONFIG) {
304
- capturedOpencodeCfgDir = env.OPENCODE_CONFIG;
305
- // OPENCODE_CONFIG now points directly to the opencode.json file.
306
- fileExistedAtSpawnTime = require("node:fs").existsSync(env.OPENCODE_CONFIG);
307
- }
308
- // Behave like the normal fake (agent exits 0, stdout = "ok").
309
- const { spawn: inner } = scriptedSpawn({ exitCode: 0, stdout: "ok" });
310
- return inner(cmd, options);
311
- };
312
- const fakeProviders = {
313
- source: "/fake/providers.json",
314
- providers: {
315
- testprov: {
316
- npm: "@ai-sdk/openai-compatible",
317
- options: { baseURL: "http://localhost:9999/v1" },
318
- },
319
- },
320
- defaultModel: "testprov/my-model",
321
- };
322
- const result = await runOne({
323
- ...baseOptions,
324
- workspace,
325
- model: "testprov/my-model",
326
- spawn: checkingSpawn,
327
- opencodeProviders: fakeProviders,
328
- });
329
- // The run should succeed or fail on the verifier — the key thing is it
330
- // is not harness_error from the provider materialise step.
331
- expect(result.outcome).not.toBe("harness_error");
332
- // The file MUST have existed at spawn time.
333
- expect(fileExistedAtSpawnTime).toBe(true);
334
- // Regression: the OPENCODE_CONFIG dir is torn down after the run.
335
- if (capturedOpencodeCfgDir) {
336
- // Dir should be cleaned up by the driver's finally block.
337
- // (We can't assert it's gone because the test itself runs in the same
338
- // process; just verify the captured path was non-empty.)
339
- expect(capturedOpencodeCfgDir.length).toBeGreaterThan(0);
340
- }
341
- });
342
- test("runOne WITHOUT opencodeProviders writes minimal stub to OPENCODE_CONFIG (regression guard)", async () => {
343
- let capturedDir;
344
- let filesAtSpawnTime = [];
345
- let stubContent;
346
- const checkingSpawn = (cmd, options) => {
347
- const env = options.env;
348
- if (env?.OPENCODE_CONFIG) {
349
- capturedDir = env.OPENCODE_CONFIG;
350
- try {
351
- // OPENCODE_CONFIG points to the file, so read it directly.
352
- stubContent = require("node:fs").readFileSync(env.OPENCODE_CONFIG, "utf8");
353
- filesAtSpawnTime = ["opencode.json"];
354
- }
355
- catch {
356
- filesAtSpawnTime = [];
357
- }
358
- }
359
- const { spawn: inner } = scriptedSpawn({ exitCode: 0, stdout: "ok" });
360
- return inner(cmd, options);
361
- };
362
- await runOne({
363
- ...baseOptions,
364
- workspace,
365
- spawn: checkingSpawn,
366
- // No opencodeProviders
367
- });
368
- expect(capturedDir).toBeDefined();
369
- // Without opencodeProviders, the driver writes a minimal stub opencode.json.
370
- expect(filesAtSpawnTime).toEqual(["opencode.json"]);
371
- expect(stubContent).toBeDefined();
372
- const parsed = JSON.parse(stubContent ?? "{}");
373
- expect(parsed.$schema).toBe("https://opencode.ai/config.json");
374
- expect(parsed.provider).toBeUndefined();
375
- });
376
- test("runOne falls back to model-only stub when provider prefix not in map (cloud/built-in models)", async () => {
377
- // "opencode" is a BUILTIN_CLOUD_PREFIX — not in fakeProviders — should write
378
- // a model-only stub and proceed rather than returning harness_error. Built-in
379
- // cloud models like "opencode/big-pickle" resolve via opencode's own registry
380
- // and do NOT need a custom provider entry.
381
- const fakeProviders = {
382
- source: "/fake/providers.json",
383
- providers: { myprov: {} },
384
- };
385
- let stubContent;
386
- const checkingSpawn = (cmd, options) => {
387
- const env = options.env;
388
- if (env?.OPENCODE_CONFIG) {
389
- try {
390
- stubContent = require("node:fs").readFileSync(env.OPENCODE_CONFIG, "utf8");
391
- }
392
- catch {
393
- /* file may not exist */
394
- }
395
- }
396
- const { spawn: inner } = scriptedSpawn({ exitCode: 0, stdout: "ok" });
397
- return inner(cmd, options);
398
- };
399
- const result = await runOne({
400
- ...baseOptions,
401
- workspace,
402
- model: "opencode/big-pickle",
403
- spawn: checkingSpawn,
404
- opencodeProviders: fakeProviders,
405
- });
406
- // Should NOT be harness_error — built-in cloud prefix falls back to stub.
407
- expect(result.outcome).not.toBe("harness_error");
408
- // The written stub should have model key but no provider block.
409
- expect(stubContent).toBeDefined();
410
- const written = JSON.parse(stubContent ?? "{}");
411
- expect(written.model).toBe("opencode/big-pickle");
412
- expect(written.provider).toBeUndefined();
413
- });
414
- test("harness_error: custom provider prefix without opencodeProviders refuses to run", async () => {
415
- // "shredder/qwen/qwen3.5-9b" has a custom prefix. Without opencodeProviders,
416
- // opencode would silently fall back to a cloud model and burn API credits.
417
- // The harness must refuse to run rather than allow that.
418
- const { spawn } = scriptedSpawn({ exitCode: 0, stdout: "ok" });
419
- const result = await runOne({
420
- ...baseOptions,
421
- workspace,
422
- model: "shredder/qwen/qwen3.5-9b",
423
- spawn,
424
- // opencodeProviders deliberately omitted
425
- });
426
- expect(result.outcome).toBe("harness_error");
427
- // Error surfaces via setupBenchEnvironment (may be wrapped in "environment setup failed")
428
- expect(result.verifierStdout).toMatch(/custom provider prefix|environment setup failed/);
429
- });
430
- });
431
- describe("driver helpers", () => {
432
- test("createIsolationDirs creates four dirs under a single root", () => {
433
- const dirs = createIsolationDirs();
434
- try {
435
- expect(fs.existsSync(dirs.cacheHome)).toBe(true);
436
- expect(fs.existsSync(dirs.configHome)).toBe(true);
437
- expect(fs.existsSync(dirs.opencodeConfig)).toBe(true);
438
- expect(dirs.cacheHome.startsWith(dirs.root)).toBe(true);
439
- }
440
- finally {
441
- fs.rmSync(dirs.root, { recursive: true, force: true });
442
- }
443
- });
444
- test("stripAkmStashDir deletes AKM_STASH_DIR in place (#261 synthetic-arm guard)", () => {
445
- const env = {
446
- AKM_STASH_DIR: "/tmp/operator-stash",
447
- XDG_CACHE_HOME: "/tmp/cache",
448
- };
449
- const result = stripAkmStashDir(env);
450
- expect(result).toBe(env); // mutates in place + returns same ref
451
- expect(env.AKM_STASH_DIR).toBeUndefined();
452
- expect(env.XDG_CACHE_HOME).toBe("/tmp/cache"); // siblings untouched
453
- // No-op on env without AKM_STASH_DIR.
454
- const env2 = { XDG_CACHE_HOME: "/tmp/cache" };
455
- stripAkmStashDir(env2);
456
- expect(env2).toEqual({ XDG_CACHE_HOME: "/tmp/cache" });
457
- });
458
- test("buildSanitizedEnvSource strips OPENCODE_API_KEY/ANTHROPIC_API_KEY/AKM_CONFIG_DIR (#271)", () => {
459
- const source = {
460
- OPENCODE_API_KEY: "leak-A",
461
- ANTHROPIC_API_KEY: "leak-B",
462
- AKM_CONFIG_DIR: "/operator/akm",
463
- PATH: "/usr/bin",
464
- HOME: "/home/op",
465
- OPENCODE_CONFIG: "/operator/opencode",
466
- UNRELATED: "kept",
467
- };
468
- const result = buildSanitizedEnvSource(source);
469
- // Operator names removed.
470
- expect(result.OPENCODE_API_KEY).toBeUndefined();
471
- expect(result.ANTHROPIC_API_KEY).toBeUndefined();
472
- expect(result.AKM_CONFIG_DIR).toBeUndefined();
473
- // Everything else preserved verbatim.
474
- expect(result.PATH).toBe("/usr/bin");
475
- expect(result.HOME).toBe("/home/op");
476
- expect(result.OPENCODE_CONFIG).toBe("/operator/opencode");
477
- expect(result.UNRELATED).toBe("kept");
478
- // Result is a copy, not the same reference (caller can mutate freely).
479
- expect(result).not.toBe(source);
480
- // Source object is untouched.
481
- expect(source.OPENCODE_API_KEY).toBe("leak-A");
482
- });
483
- test("buildSanitizedEnvSource defaults to process.env when no source given", () => {
484
- const prior = process.env.OPENCODE_API_KEY;
485
- process.env.OPENCODE_API_KEY = "default-source-leak";
486
- try {
487
- const result = buildSanitizedEnvSource();
488
- expect(result.OPENCODE_API_KEY).toBeUndefined();
489
- }
490
- finally {
491
- if (prior === undefined)
492
- delete process.env.OPENCODE_API_KEY;
493
- else
494
- process.env.OPENCODE_API_KEY = prior;
495
- }
496
- });
497
- test("buildIsolatedEnv pins the four isolation keys plus model", () => {
498
- const dirs = createIsolationDirs("/tmp/stash");
499
- try {
500
- const env = buildIsolatedEnv(dirs, "model-x");
501
- expect(env.XDG_CACHE_HOME).toBe(dirs.cacheHome);
502
- expect(env.XDG_CONFIG_HOME).toBe(dirs.configHome);
503
- expect(env.OPENCODE_CONFIG).toBe(path.join(dirs.opencodeConfig, "opencode.json"));
504
- expect(env.AKM_STASH_DIR).toBe("/tmp/stash");
505
- expect(env.BENCH_OPENCODE_MODEL).toBe("model-x");
506
- }
507
- finally {
508
- fs.rmSync(dirs.root, { recursive: true, force: true });
509
- }
510
- });
511
- test("parseTokenUsage extracts numbers when present, missing otherwise", () => {
512
- // No matchable token line at all → measurement is "missing", not a real zero (issue #252).
513
- expect(parseTokenUsage("")).toEqual({ input: 0, output: 0, measurement: "missing" });
514
- expect(parseTokenUsage("noise")).toEqual({ input: 0, output: 0, measurement: "missing" });
515
- // Both keys present → "parsed" with the actual numbers.
516
- expect(parseTokenUsage("input_tokens: 123 output_tokens: 456")).toEqual({
517
- input: 123,
518
- output: 456,
519
- measurement: "parsed",
520
- });
521
- // Only one key present → still "parsed", missing key defaults to 0.
522
- expect(parseTokenUsage("input_tokens: 99")).toEqual({ input: 99, output: 0, measurement: "parsed" });
523
- expect(parseTokenUsage("output_tokens: 55")).toEqual({ input: 0, output: 55, measurement: "parsed" });
524
- });
525
- test("readRunEvents returns [] when events.jsonl is missing and parses lines when present", () => {
526
- const tmp = benchMkdtemp("bench-events-");
527
- try {
528
- expect(readRunEvents(tmp)).toEqual([]);
529
- const akm = path.join(tmp, "akm");
530
- fs.mkdirSync(akm, { recursive: true });
531
- fs.writeFileSync(path.join(akm, "events.jsonl"), `${JSON.stringify({ schemaVersion: 1, ts: "2026-04-27T00:00:00Z", eventType: "feedback" })}\n`);
532
- const events = readRunEvents(tmp);
533
- expect(events.length).toBe(1);
534
- expect(events[0]?.eventType).toBe("feedback");
535
- }
536
- finally {
537
- fs.rmSync(tmp, { recursive: true, force: true });
538
- }
539
- });
540
- test("readRunEvents caps reads at EVENTS_READ_CAP_BYTES and records a warning when exceeded", () => {
541
- const tmp = benchMkdtemp("bench-events-cap-");
542
- try {
543
- const akm = path.join(tmp, "akm");
544
- fs.mkdirSync(akm, { recursive: true });
545
- const eventsPath = path.join(akm, "events.jsonl");
546
- // Write a leading parseable record, then a giant filler line that
547
- // pushes total size past the cap.
548
- const firstLine = `${JSON.stringify({ schemaVersion: 1, ts: "2026-04-27T00:00:00Z", eventType: "feedback" })}\n`;
549
- const fd = fs.openSync(eventsPath, "w");
550
- try {
551
- fs.writeSync(fd, firstLine);
552
- // Filler line: a single very long line that — combined with the
553
- // first — exceeds the cap. We cap at 16MiB so write 17MiB of 'x'.
554
- const fillerSize = EVENTS_READ_CAP_BYTES + 1024 * 1024;
555
- const chunk = Buffer.alloc(64 * 1024, "x".charCodeAt(0));
556
- let written = 0;
557
- while (written < fillerSize) {
558
- const remaining = fillerSize - written;
559
- const toWrite = remaining < chunk.length ? chunk.subarray(0, remaining) : chunk;
560
- fs.writeSync(fd, toWrite);
561
- written += toWrite.length;
562
- }
563
- fs.writeSync(fd, "\n");
564
- }
565
- finally {
566
- fs.closeSync(fd);
567
- }
568
- const totalSize = fs.statSync(eventsPath).size;
569
- expect(totalSize).toBeGreaterThan(EVENTS_READ_CAP_BYTES);
570
- const warnings = [];
571
- const events = readRunEvents(tmp, { warnings });
572
- // The first parseable record should still be returned from the prefix.
573
- expect(events.length).toBe(1);
574
- expect(events[0]?.eventType).toBe("feedback");
575
- // A warning was appended that mentions the cap and the actual size.
576
- expect(warnings.length).toBe(1);
577
- expect(warnings[0]).toContain("events.jsonl truncated");
578
- expect(warnings[0]).toContain(String(EVENTS_READ_CAP_BYTES));
579
- }
580
- finally {
581
- fs.rmSync(tmp, { recursive: true, force: true });
582
- }
583
- });
584
- });