akm-cli 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (332) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/dist/{src/cli.js → cli.js} +22 -8
  3. package/dist/{src/commands → commands}/installed-stashes.js +1 -1
  4. package/dist/{src/commands → commands}/source-add.js +1 -1
  5. package/dist/{src/core → core}/common.js +16 -1
  6. package/dist/{src/core → core}/config.js +5 -2
  7. package/dist/{src/indexer → indexer}/db-search.js +16 -1
  8. package/dist/{src/indexer → indexer}/graph-extraction.js +5 -3
  9. package/dist/{src/indexer → indexer}/indexer.js +27 -11
  10. package/dist/{src/indexer → indexer}/memory-inference.js +47 -58
  11. package/dist/{src/indexer → indexer}/search-source.js +1 -1
  12. package/dist/{src/llm → llm}/client.js +61 -1
  13. package/dist/{src/llm → llm}/embedder.js +8 -5
  14. package/dist/{src/llm → llm}/embedders/local.js +8 -2
  15. package/dist/{src/llm → llm}/embedders/remote.js +4 -2
  16. package/dist/{src/llm → llm}/graph-extract.js +4 -4
  17. package/dist/llm/memory-infer.js +114 -0
  18. package/dist/{src/llm → llm}/metadata-enhance.js +2 -2
  19. package/dist/{src/output → output}/cli-hints.js +2 -0
  20. package/dist/{src/setup → setup}/setup.js +30 -20
  21. package/dist/sources/providers/website.js +27 -0
  22. package/dist/{src/sources/providers/website.js → sources/website-ingest.js} +38 -51
  23. package/docs/README.md +7 -0
  24. package/docs/migration/release-notes/0.7.0.md +14 -0
  25. package/package.json +11 -8
  26. package/dist/src/llm/memory-infer.js +0 -86
  27. package/dist/tests/add-website-source.test.js +0 -119
  28. package/dist/tests/agent/agent-config-loader.test.js +0 -70
  29. package/dist/tests/agent/agent-config.test.js +0 -221
  30. package/dist/tests/agent/agent-detect.test.js +0 -100
  31. package/dist/tests/agent/agent-spawn.test.js +0 -234
  32. package/dist/tests/agent-output.test.js +0 -186
  33. package/dist/tests/architecture/agent-no-llm-sdk-guard.test.js +0 -103
  34. package/dist/tests/architecture/agent-spawn-seam.test.js +0 -193
  35. package/dist/tests/architecture/llm-stateless-seam.test.js +0 -112
  36. package/dist/tests/asset-ref.test.js +0 -192
  37. package/dist/tests/asset-registry.test.js +0 -103
  38. package/dist/tests/asset-spec.test.js +0 -241
  39. package/dist/tests/bench/attribution.test.js +0 -996
  40. package/dist/tests/bench/cleanup-sigint.test.js +0 -83
  41. package/dist/tests/bench/cleanup.js +0 -234
  42. package/dist/tests/bench/cleanup.test.js +0 -166
  43. package/dist/tests/bench/cli.js +0 -1018
  44. package/dist/tests/bench/cli.test.js +0 -445
  45. package/dist/tests/bench/compare.test.js +0 -556
  46. package/dist/tests/bench/corpus.js +0 -317
  47. package/dist/tests/bench/corpus.test.js +0 -258
  48. package/dist/tests/bench/doctor.js +0 -525
  49. package/dist/tests/bench/driver.js +0 -401
  50. package/dist/tests/bench/driver.test.js +0 -584
  51. package/dist/tests/bench/environment.js +0 -233
  52. package/dist/tests/bench/environment.test.js +0 -199
  53. package/dist/tests/bench/evolve-metrics.js +0 -179
  54. package/dist/tests/bench/evolve-metrics.test.js +0 -187
  55. package/dist/tests/bench/evolve.js +0 -647
  56. package/dist/tests/bench/evolve.test.js +0 -624
  57. package/dist/tests/bench/failure-modes.test.js +0 -349
  58. package/dist/tests/bench/feedback-integrity.test.js +0 -457
  59. package/dist/tests/bench/leakage.test.js +0 -228
  60. package/dist/tests/bench/learning-curve.test.js +0 -134
  61. package/dist/tests/bench/metrics.js +0 -2395
  62. package/dist/tests/bench/metrics.test.js +0 -1150
  63. package/dist/tests/bench/no-os-tmpdir-invariant.test.js +0 -43
  64. package/dist/tests/bench/opencode-config.js +0 -194
  65. package/dist/tests/bench/opencode-config.test.js +0 -370
  66. package/dist/tests/bench/report.js +0 -1885
  67. package/dist/tests/bench/report.test.js +0 -1038
  68. package/dist/tests/bench/run-config.js +0 -355
  69. package/dist/tests/bench/run-config.test.js +0 -298
  70. package/dist/tests/bench/run-curate-test.js +0 -32
  71. package/dist/tests/bench/run-failing-tasks.js +0 -56
  72. package/dist/tests/bench/run-full-bench.js +0 -51
  73. package/dist/tests/bench/run-items36-targeted.js +0 -69
  74. package/dist/tests/bench/run-nano-quick.js +0 -42
  75. package/dist/tests/bench/run-waveg-targeted.js +0 -62
  76. package/dist/tests/bench/runner.js +0 -699
  77. package/dist/tests/bench/runner.test.js +0 -958
  78. package/dist/tests/bench/search-bridge.test.js +0 -331
  79. package/dist/tests/bench/tmp.js +0 -131
  80. package/dist/tests/bench/trajectory.js +0 -116
  81. package/dist/tests/bench/trajectory.test.js +0 -127
  82. package/dist/tests/bench/verifier.js +0 -114
  83. package/dist/tests/bench/verifier.test.js +0 -118
  84. package/dist/tests/bench/workflow-evaluator.js +0 -557
  85. package/dist/tests/bench/workflow-evaluator.test.js +0 -421
  86. package/dist/tests/bench/workflow-spec.js +0 -345
  87. package/dist/tests/bench/workflow-spec.test.js +0 -363
  88. package/dist/tests/bench/workflow-trace.js +0 -472
  89. package/dist/tests/bench/workflow-trace.test.js +0 -254
  90. package/dist/tests/benchmark-search-quality.js +0 -536
  91. package/dist/tests/benchmark-suite.js +0 -1441
  92. package/dist/tests/capture-cli.test.js +0 -112
  93. package/dist/tests/cli-errors.test.js +0 -204
  94. package/dist/tests/commands/events.test.js +0 -370
  95. package/dist/tests/commands/history.test.js +0 -418
  96. package/dist/tests/commands/import.test.js +0 -103
  97. package/dist/tests/commands/proposal-cli.test.js +0 -209
  98. package/dist/tests/commands/reflect-propose-cli.test.js +0 -333
  99. package/dist/tests/commands/remember.test.js +0 -97
  100. package/dist/tests/commands/scope-flags.test.js +0 -300
  101. package/dist/tests/commands/search.test.js +0 -537
  102. package/dist/tests/commands/show-indexer-parity.test.js +0 -117
  103. package/dist/tests/commands/show.test.js +0 -294
  104. package/dist/tests/common.test.js +0 -266
  105. package/dist/tests/completions.test.js +0 -142
  106. package/dist/tests/config-cli.test.js +0 -193
  107. package/dist/tests/config-llm-features.test.js +0 -139
  108. package/dist/tests/config.test.js +0 -569
  109. package/dist/tests/contracts/migration-baseline.test.js +0 -43
  110. package/dist/tests/contracts/reflect-propose-envelope.test.js +0 -139
  111. package/dist/tests/contracts/spec-helpers.js +0 -46
  112. package/dist/tests/contracts/v1-spec-section-11-proposal-queue.test.js +0 -228
  113. package/dist/tests/contracts/v1-spec-section-12-agent-config.test.js +0 -56
  114. package/dist/tests/contracts/v1-spec-section-13-lesson-type.test.js +0 -34
  115. package/dist/tests/contracts/v1-spec-section-14-llm-features.test.js +0 -94
  116. package/dist/tests/contracts/v1-spec-section-4-1-asset-types.test.js +0 -39
  117. package/dist/tests/contracts/v1-spec-section-4-2-quality-rules.test.js +0 -44
  118. package/dist/tests/contracts/v1-spec-section-5-configuration.test.js +0 -47
  119. package/dist/tests/contracts/v1-spec-section-6-orchestration.test.js +0 -40
  120. package/dist/tests/contracts/v1-spec-section-7-module-layout.test.js +0 -58
  121. package/dist/tests/contracts/v1-spec-section-8-extension-points.test.js +0 -34
  122. package/dist/tests/contracts/v1-spec-section-9-4-cli-surface.test.js +0 -75
  123. package/dist/tests/contracts/v1-spec-section-9-7-llm-agent-boundary.test.js +0 -36
  124. package/dist/tests/core/write-source.test.js +0 -366
  125. package/dist/tests/curate-command.test.js +0 -87
  126. package/dist/tests/db-scoring.test.js +0 -201
  127. package/dist/tests/db.test.js +0 -654
  128. package/dist/tests/distill-cli-flag.test.js +0 -208
  129. package/dist/tests/distill.test.js +0 -515
  130. package/dist/tests/docker-install.test.js +0 -120
  131. package/dist/tests/e2e.test.js +0 -1419
  132. package/dist/tests/embedder.test.js +0 -340
  133. package/dist/tests/embedding-model-config.test.js +0 -379
  134. package/dist/tests/feedback-command.test.js +0 -172
  135. package/dist/tests/file-context.test.js +0 -552
  136. package/dist/tests/fixtures/scripts/git/summarize-diff.js +0 -9
  137. package/dist/tests/fixtures/scripts/lint/eslint-check.js +0 -7
  138. package/dist/tests/fixtures/stashes/load.js +0 -166
  139. package/dist/tests/fixtures/stashes/load.test.js +0 -97
  140. package/dist/tests/fixtures/stashes/ranking-baseline/scripts/mem0-search.js +0 -12
  141. package/dist/tests/frontmatter.test.js +0 -190
  142. package/dist/tests/fts-field-weighting.test.js +0 -254
  143. package/dist/tests/fuzzy-search.test.js +0 -230
  144. package/dist/tests/git-provider-clone.test.js +0 -45
  145. package/dist/tests/github.test.js +0 -161
  146. package/dist/tests/graph-boost-ranking.test.js +0 -305
  147. package/dist/tests/graph-extraction.test.js +0 -282
  148. package/dist/tests/helpers/usage-events.js +0 -8
  149. package/dist/tests/index-pass-llm.test.js +0 -161
  150. package/dist/tests/indexer.test.js +0 -570
  151. package/dist/tests/info-command.test.js +0 -166
  152. package/dist/tests/init.test.js +0 -69
  153. package/dist/tests/install-script.test.js +0 -246
  154. package/dist/tests/integration/agent-real-profile.test.js +0 -94
  155. package/dist/tests/issue-36-repro.test.js +0 -304
  156. package/dist/tests/issues-191-194.test.js +0 -160
  157. package/dist/tests/lesson-lint.test.js +0 -111
  158. package/dist/tests/llm-client.test.js +0 -115
  159. package/dist/tests/llm-feature-gate.test.js +0 -151
  160. package/dist/tests/llm.test.js +0 -139
  161. package/dist/tests/lockfile.test.js +0 -216
  162. package/dist/tests/manifest.test.js +0 -205
  163. package/dist/tests/markdown.test.js +0 -126
  164. package/dist/tests/matchers-unit.test.js +0 -189
  165. package/dist/tests/memory-inference.test.js +0 -299
  166. package/dist/tests/merge-scoring.test.js +0 -136
  167. package/dist/tests/metadata.test.js +0 -313
  168. package/dist/tests/migration-help.test.js +0 -89
  169. package/dist/tests/origin-resolve.test.js +0 -124
  170. package/dist/tests/output-baseline.test.js +0 -218
  171. package/dist/tests/output-shapes-unit.test.js +0 -478
  172. package/dist/tests/parallel-search.test.js +0 -272
  173. package/dist/tests/parameter-metadata.test.js +0 -365
  174. package/dist/tests/paths.test.js +0 -177
  175. package/dist/tests/progressive-disclosure.test.js +0 -280
  176. package/dist/tests/proposals.test.js +0 -279
  177. package/dist/tests/proposed-quality.test.js +0 -271
  178. package/dist/tests/provider-registry.test.js +0 -32
  179. package/dist/tests/ranking-regression.test.js +0 -548
  180. package/dist/tests/reflect-propose.test.js +0 -455
  181. package/dist/tests/registry-build-index.test.js +0 -394
  182. package/dist/tests/registry-cli.test.js +0 -290
  183. package/dist/tests/registry-index-v2.test.js +0 -430
  184. package/dist/tests/registry-install.test.js +0 -728
  185. package/dist/tests/registry-providers/parity.test.js +0 -189
  186. package/dist/tests/registry-providers/skills-sh.test.js +0 -309
  187. package/dist/tests/registry-providers/static-index.test.js +0 -238
  188. package/dist/tests/registry-resolve.test.js +0 -126
  189. package/dist/tests/registry-search.test.js +0 -923
  190. package/dist/tests/remember-frontmatter.test.js +0 -378
  191. package/dist/tests/remember-unit.test.js +0 -123
  192. package/dist/tests/ripgrep-install.test.js +0 -251
  193. package/dist/tests/ripgrep-resolve.test.js +0 -108
  194. package/dist/tests/ripgrep.test.js +0 -163
  195. package/dist/tests/save-command.test.js +0 -94
  196. package/dist/tests/save-trust-qa-fixes.test.js +0 -270
  197. package/dist/tests/scoring-pipeline.test.js +0 -648
  198. package/dist/tests/search-include-proposed-cli.test.js +0 -118
  199. package/dist/tests/self-update.test.js +0 -442
  200. package/dist/tests/semantic-search-e2e.test.js +0 -512
  201. package/dist/tests/semantic-status.test.js +0 -471
  202. package/dist/tests/setup-run.integration.js +0 -877
  203. package/dist/tests/setup-wizard.test.js +0 -198
  204. package/dist/tests/setup.test.js +0 -131
  205. package/dist/tests/source-add.test.js +0 -11
  206. package/dist/tests/source-clone.test.js +0 -254
  207. package/dist/tests/source-manage.test.js +0 -366
  208. package/dist/tests/source-providers/filesystem.test.js +0 -82
  209. package/dist/tests/source-providers/git.test.js +0 -252
  210. package/dist/tests/source-providers/website.test.js +0 -128
  211. package/dist/tests/source-qa-fixes.test.js +0 -286
  212. package/dist/tests/source-registry.test.js +0 -350
  213. package/dist/tests/source-resolve.test.js +0 -100
  214. package/dist/tests/source-source.test.js +0 -281
  215. package/dist/tests/source.test.js +0 -533
  216. package/dist/tests/tar-utils-scan.test.js +0 -73
  217. package/dist/tests/toggle-components.test.js +0 -73
  218. package/dist/tests/usage-telemetry.test.js +0 -265
  219. package/dist/tests/utility-scoring.test.js +0 -558
  220. package/dist/tests/vault-load-error.test.js +0 -78
  221. package/dist/tests/vault-qa-fixes.test.js +0 -194
  222. package/dist/tests/vault.test.js +0 -429
  223. package/dist/tests/vector-search.test.js +0 -608
  224. package/dist/tests/walker.test.js +0 -252
  225. package/dist/tests/wave2-cluster-bc.test.js +0 -228
  226. package/dist/tests/wave2-cluster-d.test.js +0 -180
  227. package/dist/tests/wave2-cluster-e.test.js +0 -179
  228. package/dist/tests/wiki-qa-fixes.test.js +0 -270
  229. package/dist/tests/wiki.test.js +0 -529
  230. package/dist/tests/workflow-cli.test.js +0 -271
  231. package/dist/tests/workflow-markdown.test.js +0 -171
  232. package/dist/tests/workflow-path-escape.test.js +0 -132
  233. package/dist/tests/workflow-qa-fixes.test.js +0 -395
  234. package/dist/tests/workflows/indexer-rejection.test.js +0 -213
  235. /package/dist/{src/commands → commands}/completions.js +0 -0
  236. /package/dist/{src/commands → commands}/config-cli.js +0 -0
  237. /package/dist/{src/commands → commands}/curate.js +0 -0
  238. /package/dist/{src/commands → commands}/distill.js +0 -0
  239. /package/dist/{src/commands → commands}/events.js +0 -0
  240. /package/dist/{src/commands → commands}/history.js +0 -0
  241. /package/dist/{src/commands → commands}/info.js +0 -0
  242. /package/dist/{src/commands → commands}/init.js +0 -0
  243. /package/dist/{src/commands → commands}/install-audit.js +0 -0
  244. /package/dist/{src/commands → commands}/migration-help.js +0 -0
  245. /package/dist/{src/commands → commands}/proposal.js +0 -0
  246. /package/dist/{src/commands → commands}/propose.js +0 -0
  247. /package/dist/{src/commands → commands}/reflect.js +0 -0
  248. /package/dist/{src/commands → commands}/registry-search.js +0 -0
  249. /package/dist/{src/commands → commands}/remember.js +0 -0
  250. /package/dist/{src/commands → commands}/search.js +0 -0
  251. /package/dist/{src/commands → commands}/self-update.js +0 -0
  252. /package/dist/{src/commands → commands}/show.js +0 -0
  253. /package/dist/{src/commands → commands}/source-clone.js +0 -0
  254. /package/dist/{src/commands → commands}/source-manage.js +0 -0
  255. /package/dist/{src/commands → commands}/vault.js +0 -0
  256. /package/dist/{src/core → core}/asset-ref.js +0 -0
  257. /package/dist/{src/core → core}/asset-registry.js +0 -0
  258. /package/dist/{src/core → core}/asset-spec.js +0 -0
  259. /package/dist/{src/core → core}/errors.js +0 -0
  260. /package/dist/{src/core → core}/events.js +0 -0
  261. /package/dist/{src/core → core}/frontmatter.js +0 -0
  262. /package/dist/{src/core → core}/lesson-lint.js +0 -0
  263. /package/dist/{src/core → core}/markdown.js +0 -0
  264. /package/dist/{src/core → core}/paths.js +0 -0
  265. /package/dist/{src/core → core}/proposals.js +0 -0
  266. /package/dist/{src/core → core}/warn.js +0 -0
  267. /package/dist/{src/core → core}/write-source.js +0 -0
  268. /package/dist/{src/indexer → indexer}/db.js +0 -0
  269. /package/dist/{src/indexer → indexer}/file-context.js +0 -0
  270. /package/dist/{src/indexer → indexer}/graph-boost.js +0 -0
  271. /package/dist/{src/indexer → indexer}/manifest.js +0 -0
  272. /package/dist/{src/indexer → indexer}/matchers.js +0 -0
  273. /package/dist/{src/indexer → indexer}/metadata.js +0 -0
  274. /package/dist/{src/indexer → indexer}/search-fields.js +0 -0
  275. /package/dist/{src/indexer → indexer}/semantic-status.js +0 -0
  276. /package/dist/{src/indexer → indexer}/usage-events.js +0 -0
  277. /package/dist/{src/indexer → indexer}/walker.js +0 -0
  278. /package/dist/{src/integrations → integrations}/agent/config.js +0 -0
  279. /package/dist/{src/integrations → integrations}/agent/detect.js +0 -0
  280. /package/dist/{src/integrations → integrations}/agent/index.js +0 -0
  281. /package/dist/{src/integrations → integrations}/agent/profiles.js +0 -0
  282. /package/dist/{src/integrations → integrations}/agent/prompts.js +0 -0
  283. /package/dist/{src/integrations → integrations}/agent/spawn.js +0 -0
  284. /package/dist/{src/integrations → integrations}/github.js +0 -0
  285. /package/dist/{src/integrations → integrations}/lockfile.js +0 -0
  286. /package/dist/{src/llm → llm}/embedders/cache.js +0 -0
  287. /package/dist/{src/llm → llm}/embedders/types.js +0 -0
  288. /package/dist/{src/llm → llm}/feature-gate.js +0 -0
  289. /package/dist/{src/llm → llm}/index-passes.js +0 -0
  290. /package/dist/{src/output → output}/context.js +0 -0
  291. /package/dist/{src/output → output}/renderers.js +0 -0
  292. /package/dist/{src/output → output}/shapes.js +0 -0
  293. /package/dist/{src/output → output}/text.js +0 -0
  294. /package/dist/{src/registry → registry}/build-index.js +0 -0
  295. /package/dist/{src/registry → registry}/create-provider-registry.js +0 -0
  296. /package/dist/{src/registry → registry}/factory.js +0 -0
  297. /package/dist/{src/registry → registry}/origin-resolve.js +0 -0
  298. /package/dist/{src/registry → registry}/providers/index.js +0 -0
  299. /package/dist/{src/registry → registry}/providers/skills-sh.js +0 -0
  300. /package/dist/{src/registry → registry}/providers/static-index.js +0 -0
  301. /package/dist/{src/registry → registry}/providers/types.js +0 -0
  302. /package/dist/{src/registry → registry}/resolve.js +0 -0
  303. /package/dist/{src/registry → registry}/types.js +0 -0
  304. /package/dist/{src/setup → setup}/detect.js +0 -0
  305. /package/dist/{src/setup → setup}/ripgrep-install.js +0 -0
  306. /package/dist/{src/setup → setup}/ripgrep-resolve.js +0 -0
  307. /package/dist/{src/setup → setup}/steps.js +0 -0
  308. /package/dist/{src/sources → sources}/include.js +0 -0
  309. /package/dist/{src/sources → sources}/provider-factory.js +0 -0
  310. /package/dist/{src/sources → sources}/provider.js +0 -0
  311. /package/dist/{src/sources → sources}/providers/filesystem.js +0 -0
  312. /package/dist/{src/sources → sources}/providers/git.js +0 -0
  313. /package/dist/{src/sources → sources}/providers/index.js +0 -0
  314. /package/dist/{src/sources → sources}/providers/install-types.js +0 -0
  315. /package/dist/{src/sources → sources}/providers/npm.js +0 -0
  316. /package/dist/{src/sources → sources}/providers/provider-utils.js +0 -0
  317. /package/dist/{src/sources → sources}/providers/sync-from-ref.js +0 -0
  318. /package/dist/{src/sources → sources}/providers/tar-utils.js +0 -0
  319. /package/dist/{src/sources → sources}/resolve.js +0 -0
  320. /package/dist/{src/sources → sources}/types.js +0 -0
  321. /package/dist/{src/templates → templates}/wiki-templates.js +0 -0
  322. /package/dist/{src/version.js → version.js} +0 -0
  323. /package/dist/{src/wiki → wiki}/wiki.js +0 -0
  324. /package/dist/{src/workflows → workflows}/authoring.js +0 -0
  325. /package/dist/{src/workflows → workflows}/cli.js +0 -0
  326. /package/dist/{src/workflows → workflows}/db.js +0 -0
  327. /package/dist/{src/workflows → workflows}/document-cache.js +0 -0
  328. /package/dist/{src/workflows → workflows}/parser.js +0 -0
  329. /package/dist/{src/workflows → workflows}/renderer.js +0 -0
  330. /package/dist/{src/workflows → workflows}/runs.js +0 -0
  331. /package/dist/{src/workflows → workflows}/schema.js +0 -0
  332. /package/dist/{src/workflows → workflows}/validator.js +0 -0
@@ -1,233 +0,0 @@
1
- /**
2
- * environment.ts — unified bench environment setup.
3
- *
4
- * `setupBenchEnvironment` is the single function that owns all per-run
5
- * isolation: isolation dirs, opencode.json, akm config, FTS5 index. Both
6
- * `runOne` (driver.ts) and the doctor's live-run check call this function,
7
- * guaranteeing they produce identical environments.
8
- *
9
- * Key design decisions:
10
- * - `BENCH_OPENCODE_INVARIANTS` (plugin:[], permission block) are always
11
- * written — they are bench isolation invariants, not conditional on the
12
- * provider path. No silent stub fallbacks.
13
- * - `dryRun: true` skips the akm config and index writes. Unit tests set
14
- * this so the setup path is exercised without spawning a real agent.
15
- * - `validateFixtureCorpus` is called at bench startup to catch missing
16
- * fixtures before any work items start, not per-task mid-run.
17
- */
18
- import fs from "node:fs";
19
- import path from "node:path";
20
- import { buildIsolatedEnv, buildSanitizedEnvSource, createIsolationDirs } from "./driver";
21
- import { BenchConfigError, selectProviderForModel } from "./opencode-config";
22
- import { benchMkdtemp } from "./tmp";
23
- // ── Bench isolation invariants ───────────────────────────────────────────────
24
- /**
25
- * Top-level keys written unconditionally into every bench-generated
26
- * opencode.json. These are isolation invariants — never conditional on
27
- * provider resolution or model type.
28
- *
29
- * - `plugin: []` — prevents operator plugins (akm-opencode, etc.) from
30
- * running lifecycle hooks that override AKM_STASH_DIR, warm indexes
31
- * against the wrong stash, or prompt akm setup wizards.
32
- * - `permission` — opencode in non-interactive (`opencode run`) mode
33
- * silently skips tool calls without explicit permission grants.
34
- */
35
- export const BENCH_OPENCODE_INVARIANTS = {
36
- plugin: [],
37
- permission: {
38
- bash: "allow",
39
- edit: "allow",
40
- write: "allow",
41
- read: "allow",
42
- webfetch: "allow",
43
- },
44
- };
45
- // ── Built-in cloud prefixes ──────────────────────────────────────────────────
46
- /**
47
- * opencode provider prefixes that resolve via its built-in cloud-provider
48
- * registry. Models with one of these prefixes do not need a custom provider
49
- * entry in the bench providers JSON. Models with any other prefix require
50
- * `opencodeProviders` — the harness refuses to run without it to prevent
51
- * silent cloud-model fallback and unexpected API charges.
52
- */
53
- export const BUILTIN_CLOUD_PREFIXES = new Set([
54
- "anthropic",
55
- "openai",
56
- "openrouter",
57
- "opencode",
58
- "google",
59
- "amazon",
60
- "azure",
61
- "vertex",
62
- "bedrock",
63
- "mistral",
64
- "groq",
65
- "together",
66
- "fireworks",
67
- ]);
68
- /**
69
- * Write an `opencode.json` into `opencodeConfigDir`.
70
- *
71
- * Always includes `BENCH_OPENCODE_INVARIANTS` (plugin:[], permission block).
72
- * When `providers` is supplied and the model prefix resolves, the `provider`
73
- * block is added. When the prefix is not found in the providers map (built-in
74
- * cloud model), the file is written without a provider block and a warning is
75
- * returned — this is not an error because built-in cloud models resolve via
76
- * opencode's own registry.
77
- *
78
- * Returns a `WriteOpencodeJsonResult` — never throws for expected cases.
79
- * Throws for unexpected FS errors.
80
- */
81
- export function writeOpencodeJson(opencodeConfigDir, model, providers) {
82
- const warnings = [];
83
- let providerKey;
84
- let providerBlock;
85
- if (providers) {
86
- try {
87
- const selected = selectProviderForModel(providers, model);
88
- providerKey = selected.providerKey;
89
- providerBlock = { [selected.providerKey]: selected.entry };
90
- }
91
- catch (err) {
92
- if (err instanceof BenchConfigError) {
93
- // Check if this is a local-provider model that MUST have a provider block.
94
- const modelPrefix = model.split("/")[0];
95
- if (modelPrefix && !BUILTIN_CLOUD_PREFIXES.has(modelPrefix)) {
96
- // Local-prefix model not in providers map — this is a hard error, not a
97
- // fallback. Writing opencode.json without a provider block would cause
98
- // opencode to use cloud resolution, skewing results and incurring costs.
99
- throw new BenchConfigError(`model "${model}" uses local prefix "${modelPrefix}" but was not found in the providers config. ` +
100
- `Add it to the providers file or use a built-in cloud model prefix.`, true);
101
- }
102
- warnings.push(`model "${model}" not found in providers config; writing stub (expected for built-in cloud models)`);
103
- }
104
- else {
105
- throw err;
106
- }
107
- }
108
- }
109
- const config = {
110
- $schema: "https://opencode.ai/config.json",
111
- model,
112
- ...BENCH_OPENCODE_INVARIANTS,
113
- ...(providerBlock ? { provider: providerBlock } : {}),
114
- };
115
- fs.writeFileSync(path.join(opencodeConfigDir, "opencode.json"), JSON.stringify(config, null, 2), { mode: 0o600 });
116
- return { providerKey, warnings };
117
- }
118
- /**
119
- * Set up a complete bench run environment.
120
- *
121
- * 1. Creates isolation dirs (XDG_CACHE_HOME, XDG_CONFIG_HOME, OPENCODE_CONFIG).
122
- * 2. Writes opencode.json with BENCH_OPENCODE_INVARIANTS + optional provider.
123
- * 3. Writes $XDG_CONFIG_HOME/akm/config.json so the akm CLI and any plugin
124
- * find the correct stash via `akm config get stashDir`.
125
- * 4. Copies the pre-built FTS5 index into XDG_CACHE_HOME, or re-indexes as
126
- * fallback if no pre-built cache is available.
127
- *
128
- * Throws `BenchConfigError` for model prefix / provider mismatches.
129
- */
130
- export function setupBenchEnvironment(params) {
131
- const { model, arm, stashDir: rawStashDir, indexCacheHome, providers, dryRun = false, warnings = [] } = params;
132
- // Synthetic arm must never carry a stash.
133
- const stashDir = arm === "synthetic" ? undefined : rawStashDir;
134
- // Safety: refuse to run local-provider models without a providers config.
135
- const modelParts = model.split("/");
136
- if (modelParts.length >= 2 && !BUILTIN_CLOUD_PREFIXES.has(modelParts[0]) && !providers) {
137
- throw new BenchConfigError(`model "${model}" uses custom provider prefix "${modelParts[0]}" — supply opencodeProviders to avoid silent fallback to a cloud model`, false);
138
- }
139
- const dirs = createIsolationDirs(stashDir);
140
- const env = buildIsolatedEnv(dirs, model);
141
- // Synthetic arm must not carry AKM_STASH_DIR even if createIsolationDirs
142
- // somehow set it (recurrence guard for the #243 fixup pattern).
143
- if (arm === "synthetic") {
144
- delete env.AKM_STASH_DIR;
145
- }
146
- // Write opencode.json with invariants + optional provider block.
147
- const result = writeOpencodeJson(dirs.opencodeConfig, model, providers);
148
- for (const w of result.warnings)
149
- warnings.push(w);
150
- // Wire akm config and index only when a real stash is on disk.
151
- const stashOnDisk = stashDir ? fs.existsSync(stashDir) : false;
152
- if (stashDir && stashOnDisk && !dryRun) {
153
- // akm config: so `akm config get stashDir` returns the fixture path
154
- // and the akm-opencode plugin (if somehow re-enabled) injects the right
155
- // AKM_STASH_DIR into the bash-tool env via its shell.env hook.
156
- const akmConfigDir = path.join(dirs.configHome, "akm");
157
- fs.mkdirSync(akmConfigDir, { recursive: true });
158
- fs.writeFileSync(path.join(akmConfigDir, "config.json"), JSON.stringify({ stashDir }), { mode: 0o600 });
159
- // FTS5 index: fast-path copy from pre-built cache; slow-path re-index.
160
- const destAkmDir = path.join(dirs.cacheHome, "akm");
161
- fs.mkdirSync(destAkmDir, { recursive: true });
162
- if (indexCacheHome) {
163
- const srcAkmDir = path.join(indexCacheHome, "akm");
164
- try {
165
- for (const entry of fs.readdirSync(srcAkmDir)) {
166
- fs.copyFileSync(path.join(srcAkmDir, entry), path.join(destAkmDir, entry));
167
- }
168
- }
169
- catch (err) {
170
- warnings.push(`index copy failed, falling back to re-index: ${err.message}`);
171
- _runAkmIndex(stashDir, env);
172
- }
173
- }
174
- else {
175
- _runAkmIndex(stashDir, env);
176
- }
177
- }
178
- return {
179
- dirs,
180
- env,
181
- teardown() {
182
- try {
183
- fs.rmSync(dirs.root, { recursive: true, force: true });
184
- }
185
- catch {
186
- /* swallow */
187
- }
188
- },
189
- };
190
- }
191
- function _runAkmIndex(stashDir, env) {
192
- const cliEntry = path.resolve(__dirname, "..", "..", "src", "cli.ts");
193
- Bun.spawnSync({
194
- cmd: ["bun", "run", cliEntry, "index", "--full"],
195
- cwd: stashDir,
196
- env: { ...buildSanitizedEnvSource(), ...env },
197
- stdout: "pipe",
198
- stderr: "pipe",
199
- });
200
- }
201
- // ── validateFixtureCorpus ────────────────────────────────────────────────────
202
- const FIXTURES_ROOT = path.resolve(__dirname, "..", "fixtures", "stashes");
203
- /**
204
- * Validate that all task stash references name fixtures that exist on disk
205
- * (i.e. have a MANIFEST.json). Returns the set of missing fixture names.
206
- *
207
- * Call at bench startup before creating any work items. A non-empty `missing`
208
- * set means those tasks will produce `harness_error` at run time — better to
209
- * surface that now with named failures than to discover it per-seed.
210
- */
211
- export function validateFixtureCorpus(tasks) {
212
- const byFixture = new Map();
213
- for (const t of tasks) {
214
- if (!byFixture.has(t.stash))
215
- byFixture.set(t.stash, []);
216
- byFixture.get(t.stash)?.push(t.id);
217
- }
218
- const valid = new Set();
219
- const missing = new Map();
220
- for (const [fixture, taskIds] of byFixture) {
221
- const manifestPath = path.join(FIXTURES_ROOT, fixture, "MANIFEST.json");
222
- if (fs.existsSync(manifestPath)) {
223
- valid.add(fixture);
224
- }
225
- else {
226
- missing.set(fixture, taskIds);
227
- }
228
- }
229
- return { valid, missing };
230
- }
231
- // Re-export from driver for consumers that previously imported from there.
232
- export { buildIsolatedEnv, buildSanitizedEnvSource, createIsolationDirs } from "./driver";
233
- export { benchMkdtemp };
@@ -1,199 +0,0 @@
1
- /**
2
- * Tests for environment.ts — writeOpencodeJson, validateFixtureCorpus,
3
- * BENCH_OPENCODE_INVARIANTS, and setupBenchEnvironment (dryRun mode).
4
- */
5
- import { afterAll, beforeAll, describe, expect, test } from "bun:test";
6
- import fs from "node:fs";
7
- import path from "node:path";
8
- import { BENCH_OPENCODE_INVARIANTS, BUILTIN_CLOUD_PREFIXES, setupBenchEnvironment, validateFixtureCorpus, writeOpencodeJson, } from "./environment";
9
- import { benchMkdtemp } from "./tmp";
10
- // ── writeOpencodeJson ────────────────────────────────────────────────────────
11
- describe("writeOpencodeJson", () => {
12
- let tmp;
13
- beforeAll(() => {
14
- tmp = benchMkdtemp("bench-env-test-");
15
- });
16
- afterAll(() => {
17
- fs.rmSync(tmp, { recursive: true, force: true });
18
- });
19
- test("always writes plugin:[] and permission block (isolation invariants)", () => {
20
- const dir = path.join(tmp, "invariants");
21
- fs.mkdirSync(dir, { recursive: true });
22
- writeOpencodeJson(dir, "anthropic/claude-opus-4-7");
23
- const config = JSON.parse(fs.readFileSync(path.join(dir, "opencode.json"), "utf8"));
24
- expect(config.plugin).toEqual([]);
25
- expect(config.permission?.bash).toBe("allow");
26
- expect(config.permission?.edit).toBe("allow");
27
- expect(config.permission?.write).toBe("allow");
28
- });
29
- test("writes provider block when model resolves in providers map", () => {
30
- const dir = path.join(tmp, "with-provider");
31
- fs.mkdirSync(dir, { recursive: true });
32
- const providers = {
33
- source: "/fake/providers.json",
34
- providers: { myprov: { npm: "@ai-sdk/openai-compatible", name: "My Provider" } },
35
- };
36
- const result = writeOpencodeJson(dir, "myprov/my-model", providers);
37
- expect(result.providerKey).toBe("myprov");
38
- expect(result.warnings).toHaveLength(0);
39
- const config = JSON.parse(fs.readFileSync(path.join(dir, "opencode.json"), "utf8"));
40
- expect(config.provider?.myprov).toBeDefined();
41
- expect(config.model).toBe("myprov/my-model");
42
- });
43
- test("writes stub (no provider block) and returns warning for built-in cloud model not in providers map", () => {
44
- const dir = path.join(tmp, "cloud-stub");
45
- fs.mkdirSync(dir, { recursive: true });
46
- const providers = {
47
- source: "/fake/providers.json",
48
- providers: { otherprov: {} },
49
- };
50
- const result = writeOpencodeJson(dir, "opencode/big-pickle", providers);
51
- expect(result.providerKey).toBeUndefined();
52
- expect(result.warnings.length).toBeGreaterThan(0);
53
- const config = JSON.parse(fs.readFileSync(path.join(dir, "opencode.json"), "utf8"));
54
- expect(config.provider).toBeUndefined();
55
- // Invariants still present.
56
- expect(config.plugin).toEqual([]);
57
- });
58
- test("throws BenchConfigError for local-prefix model not found in providers map", () => {
59
- const dir = path.join(tmp, "local-prefix-missing");
60
- fs.mkdirSync(dir, { recursive: true });
61
- const providers = {
62
- source: "/fake/providers.json",
63
- providers: { otherprov: {} },
64
- };
65
- // "shredder" is not in BUILTIN_CLOUD_PREFIXES and not in the providers map.
66
- expect(() => writeOpencodeJson(dir, "shredder/qwen3.5-9b", providers)).toThrow(/local prefix/);
67
- // The opencode.json must NOT have been written (or if partially written, provider block is absent).
68
- // We check that the function threw rather than silently wrote a cloud-fallback stub.
69
- });
70
- test("writes provider block for local-prefix model that IS found in providers map", () => {
71
- const dir = path.join(tmp, "local-prefix-found");
72
- fs.mkdirSync(dir, { recursive: true });
73
- const providers = {
74
- source: "/fake/providers.json",
75
- providers: { shredder: { npm: "@ai-sdk/openai-compatible", name: "Shredder" } },
76
- };
77
- const result = writeOpencodeJson(dir, "shredder/qwen3.5-9b", providers);
78
- expect(result.providerKey).toBe("shredder");
79
- expect(result.warnings).toHaveLength(0);
80
- const config = JSON.parse(fs.readFileSync(path.join(dir, "opencode.json"), "utf8"));
81
- expect(config.provider?.shredder).toBeDefined();
82
- expect(config.model).toBe("shredder/qwen3.5-9b");
83
- });
84
- test("mode 0o600 (not world-readable)", () => {
85
- const dir = path.join(tmp, "mode-check");
86
- fs.mkdirSync(dir, { recursive: true });
87
- writeOpencodeJson(dir, "anthropic/claude-opus-4-7");
88
- const stat = fs.statSync(path.join(dir, "opencode.json"));
89
- expect(stat.mode & 0o777).toBe(0o600);
90
- });
91
- });
92
- // ── BENCH_OPENCODE_INVARIANTS ────────────────────────────────────────────────
93
- describe("BENCH_OPENCODE_INVARIANTS", () => {
94
- test("plugin is an empty readonly array", () => {
95
- expect(BENCH_OPENCODE_INVARIANTS.plugin).toEqual([]);
96
- expect(Array.isArray(BENCH_OPENCODE_INVARIANTS.plugin)).toBe(true);
97
- });
98
- test("permission.bash is 'allow'", () => {
99
- expect(BENCH_OPENCODE_INVARIANTS.permission.bash).toBe("allow");
100
- });
101
- });
102
- // ── BUILTIN_CLOUD_PREFIXES ───────────────────────────────────────────────────
103
- describe("BUILTIN_CLOUD_PREFIXES", () => {
104
- test("includes anthropic, openai, opencode", () => {
105
- expect(BUILTIN_CLOUD_PREFIXES.has("anthropic")).toBe(true);
106
- expect(BUILTIN_CLOUD_PREFIXES.has("openai")).toBe(true);
107
- expect(BUILTIN_CLOUD_PREFIXES.has("opencode")).toBe(true);
108
- });
109
- test("does not include custom provider prefixes like 'shredder' or 'don'", () => {
110
- expect(BUILTIN_CLOUD_PREFIXES.has("shredder")).toBe(false);
111
- expect(BUILTIN_CLOUD_PREFIXES.has("don")).toBe(false);
112
- });
113
- });
114
- // ── validateFixtureCorpus ────────────────────────────────────────────────────
115
- describe("validateFixtureCorpus", () => {
116
- test("returns known fixtures as valid", () => {
117
- const tasks = [{ id: "az-cli/foo", stash: "az-cli" }];
118
- const { valid, missing } = validateFixtureCorpus(tasks);
119
- expect(valid.has("az-cli")).toBe(true);
120
- expect(missing.size).toBe(0);
121
- });
122
- test("returns nonexistent fixture as missing with its task IDs", () => {
123
- const tasks = [
124
- { id: "ghost/task-1", stash: "ghost-fixture" },
125
- { id: "ghost/task-2", stash: "ghost-fixture" },
126
- ];
127
- const { valid, missing } = validateFixtureCorpus(tasks);
128
- expect(valid.has("ghost-fixture")).toBe(false);
129
- expect(missing.has("ghost-fixture")).toBe(true);
130
- expect(missing.get("ghost-fixture")).toEqual(["ghost/task-1", "ghost/task-2"]);
131
- });
132
- test("handles empty task list", () => {
133
- const { valid, missing } = validateFixtureCorpus([]);
134
- expect(valid.size).toBe(0);
135
- expect(missing.size).toBe(0);
136
- });
137
- test("deduplicates fixture names across tasks", () => {
138
- const tasks = [
139
- { id: "az-cli/a", stash: "az-cli" },
140
- { id: "az-cli/b", stash: "az-cli" },
141
- { id: "az-cli/c", stash: "az-cli" },
142
- ];
143
- const { valid } = validateFixtureCorpus(tasks);
144
- expect(valid.size).toBe(1);
145
- });
146
- });
147
- // ── setupBenchEnvironment (dryRun) ───────────────────────────────────────────
148
- describe("setupBenchEnvironment dryRun", () => {
149
- test("creates isolation dirs and writes opencode.json with invariants", () => {
150
- const env = setupBenchEnvironment({
151
- model: "anthropic/claude-opus-4-7",
152
- arm: "akm",
153
- dryRun: true,
154
- });
155
- try {
156
- expect(fs.existsSync(env.dirs.cacheHome)).toBe(true);
157
- expect(fs.existsSync(env.dirs.configHome)).toBe(true);
158
- expect(fs.existsSync(env.dirs.opencodeConfig)).toBe(true);
159
- const config = JSON.parse(fs.readFileSync(path.join(env.dirs.opencodeConfig, "opencode.json"), "utf8"));
160
- expect(config.plugin).toEqual([]);
161
- expect(config.permission?.bash).toBe("allow");
162
- }
163
- finally {
164
- env.teardown();
165
- }
166
- });
167
- test("throws for custom provider prefix without providers config", () => {
168
- expect(() => setupBenchEnvironment({
169
- model: "shredder/qwen/qwen3.5-9b",
170
- arm: "akm",
171
- dryRun: true,
172
- })).toThrow(/custom provider prefix/);
173
- });
174
- test("synthetic arm never sets AKM_STASH_DIR", () => {
175
- const env = setupBenchEnvironment({
176
- model: "anthropic/claude-opus-4-7",
177
- arm: "synthetic",
178
- stashDir: "/some/stash",
179
- dryRun: true,
180
- });
181
- try {
182
- expect(env.env.AKM_STASH_DIR).toBeUndefined();
183
- }
184
- finally {
185
- env.teardown();
186
- }
187
- });
188
- test("teardown removes the isolation dirs", () => {
189
- const env = setupBenchEnvironment({
190
- model: "anthropic/claude-opus-4-7",
191
- arm: "akm",
192
- dryRun: true,
193
- });
194
- const { root } = env.dirs;
195
- expect(fs.existsSync(root)).toBe(true);
196
- env.teardown();
197
- expect(fs.existsSync(root)).toBe(false);
198
- });
199
- });
@@ -1,179 +0,0 @@
1
- /**
2
- * Track B lesson quality + reuse metrics (issue #264, spec §6.3 follow-up).
3
- *
4
- * `computeLessonMetrics` walks the evolve runner's proposal log and the
5
- * Phase 3 pre/post arm `RunResult[]`s and emits one `LessonRecord` per
6
- * lesson-kind proposal. The record captures:
7
- *
8
- * - `source_failures` — eval/train tasks whose negative feedback events
9
- * targeted this asset ref (joined via the supplied `feedbackLog`).
10
- * - `lint_pass` / `accepted` — verbatim from the proposal log entry.
11
- * - `first_reused_on` / `reuse_count` / `reuse_pass_rate` — how often the
12
- * accepted lesson's ref appeared in post-arm runs' `assetsLoaded`, and
13
- * the pass-rate among those reuses.
14
- * - `negative_transfer_count` — count of (taskId, seed) pairs where the
15
- * same task PASSED in pre but FAILED in post AND the post run loaded
16
- * this lesson's ref. Spec §6.4 negative-transfer attribution.
17
- * - `leakage_risk` — `"high"` when any verbatim 4-token-or-longer phrase
18
- * in the supplied verifier source(s) appears verbatim in the lesson
19
- * body; `"medium"` for 3-token leakage; `"low"` otherwise. Mirrors the
20
- * Wave 3 `leakage.test.ts` philosophy: structural fragments are red
21
- * flags, lone tokens are not.
22
- *
23
- * The function is pure: no disk I/O, no subprocess. Callers (the evolve
24
- * runner) thread lesson bodies + verifier sources through optional maps so
25
- * the leakage check is fully deterministic and testable with mock inputs.
26
- */
27
- /**
28
- * Compute lesson-quality + reuse metrics from the evolve runner's outputs.
29
- * Pure function — does not touch disk and does not invoke any subprocess.
30
- *
31
- * Only `proposalLog` entries with `kind === "lesson"` are surfaced as
32
- * `LessonRecord`s. Revision-kind proposals are tracked elsewhere (the
33
- * §6.3 `proposals` block already covers them) and would skew the lesson
34
- * reuse rate if mixed in.
35
- */
36
- export function computeLessonMetrics(input) {
37
- const lessons = input.proposalLog.filter((p) => p.kind === "lesson");
38
- const feedbackLog = input.feedbackLog ?? [];
39
- const preRuns = input.preRuns ?? [];
40
- const postRuns = input.postRuns ?? [];
41
- const lessonBodies = input.lessonBodies ?? {};
42
- const verifierSources = input.verifierSources ?? {};
43
- // Pre-index pre-arm task → seed → outcome so negative-transfer attribution
44
- // is a constant-time lookup per post run.
45
- const preOutcomes = new Map();
46
- for (const r of preRuns) {
47
- let inner = preOutcomes.get(r.taskId);
48
- if (!inner) {
49
- inner = new Map();
50
- preOutcomes.set(r.taskId, inner);
51
- }
52
- inner.set(r.seed, r.outcome);
53
- }
54
- // Pre-index negative feedback by ref so source_failures is O(events).
55
- const negativeFeedbackByRef = new Map();
56
- for (const ev of feedbackLog) {
57
- if (ev.signal !== "negative")
58
- continue;
59
- let set = negativeFeedbackByRef.get(ev.goldRef);
60
- if (!set) {
61
- set = new Set();
62
- negativeFeedbackByRef.set(ev.goldRef, set);
63
- }
64
- set.add(ev.taskId);
65
- }
66
- const records = lessons.map((p) => {
67
- const ref = p.assetRef;
68
- const sourceFailures = [...(negativeFeedbackByRef.get(ref) ?? [])].sort();
69
- // Reuse: post-arm runs that loaded this ref.
70
- let firstReusedOn = null;
71
- let reuseCount = 0;
72
- let reusePassCount = 0;
73
- // Negative transfer: post-FAIL where pre-PASS for the same (task, seed)
74
- // AND this lesson was loaded in the post run. Dedupe by taskId so a
75
- // task that regresses across multiple seeds counts once.
76
- const negativeTransferTasks = new Set();
77
- if (p.decision === "accept") {
78
- for (const r of postRuns) {
79
- if (!r.assetsLoaded?.includes(ref))
80
- continue;
81
- if (firstReusedOn === null)
82
- firstReusedOn = r.taskId;
83
- reuseCount += 1;
84
- if (r.outcome === "pass")
85
- reusePassCount += 1;
86
- if (r.outcome === "fail" || r.outcome === "budget_exceeded") {
87
- const prePerSeed = preOutcomes.get(r.taskId);
88
- if (prePerSeed && prePerSeed.get(r.seed) === "pass") {
89
- negativeTransferTasks.add(r.taskId);
90
- }
91
- }
92
- }
93
- }
94
- const reusePassRate = reuseCount === 0 ? 0 : reusePassCount / reuseCount;
95
- const leakageRisk = classifyLeakageRisk(lessonBodies[ref], verifierSources[ref]);
96
- return {
97
- ref,
98
- source_failures: sourceFailures,
99
- lint_pass: p.lintPass,
100
- accepted: p.decision === "accept",
101
- first_reused_on: firstReusedOn,
102
- reuse_count: reuseCount,
103
- reuse_pass_rate: reusePassRate,
104
- negative_transfer_count: negativeTransferTasks.size,
105
- leakage_risk: leakageRisk,
106
- };
107
- });
108
- records.sort((a, b) => a.ref.localeCompare(b.ref));
109
- const total = records.length;
110
- const accepted = records.filter((r) => r.accepted);
111
- const lintPassed = records.filter((r) => r.lint_pass).length;
112
- const reusedAccepted = accepted.filter((r) => r.reuse_count > 0);
113
- const reusePassRateSum = reusedAccepted.reduce((sum, r) => sum + r.reuse_pass_rate, 0);
114
- const negativeTransferTotal = records.reduce((sum, r) => sum + r.negative_transfer_count, 0);
115
- return {
116
- lessons: records,
117
- lessons_created_count: total,
118
- lessons_accepted_count: accepted.length,
119
- proposal_lint_pass_rate: total === 0 ? 0 : lintPassed / total,
120
- proposal_acceptance_rate: total === 0 ? 0 : accepted.length / total,
121
- lesson_reuse_rate: accepted.length === 0 ? 0 : reusedAccepted.length / accepted.length,
122
- lesson_reuse_success_rate: reusedAccepted.length === 0 ? 0 : reusePassRateSum / reusedAccepted.length,
123
- lesson_negative_transfer_count: negativeTransferTotal,
124
- };
125
- }
126
- /**
127
- * Classify lesson-body leakage against verifier source text. Returns
128
- * `"high"` when a 4+-word verbatim phrase from any verifier-source entry
129
- * appears in the body; `"medium"` for 3-word overlap; `"low"` otherwise.
130
- *
131
- * The check is intentionally simple — Wave 3's `leakage.test.ts` uses
132
- * structural assertion extraction (regex literals, dotted paths, jq/grep
133
- * patterns); here we just slide an N-gram window over the verifier text
134
- * and ask "does the body contain this exact run of words?". Tokens are
135
- * normalised to lowercase and split on non-word boundaries so trivial
136
- * whitespace differences don't hide leakage.
137
- */
138
- export function classifyLeakageRisk(body, verifierSources) {
139
- if (!body || !verifierSources || verifierSources.length === 0)
140
- return "low";
141
- const bodyTokens = tokenize(body);
142
- if (bodyTokens.length === 0)
143
- return "low";
144
- const bodyJoined = ` ${bodyTokens.join(" ")} `;
145
- let mediumHit = false;
146
- for (const source of verifierSources) {
147
- const sourceTokens = tokenize(source);
148
- if (sourceTokens.length < 3)
149
- continue;
150
- if (containsNGram(bodyJoined, sourceTokens, 4))
151
- return "high";
152
- if (!mediumHit && containsNGram(bodyJoined, sourceTokens, 3))
153
- mediumHit = true;
154
- }
155
- return mediumHit ? "medium" : "low";
156
- }
157
- /**
158
- * Slide an N-gram window of size `n` across `tokens` and return true if any
159
- * window appears as a contiguous substring inside `bodyJoined` (which is
160
- * pre-padded with spaces so word boundaries match cleanly). Skips windows
161
- * shorter than `n`; returns false on empty input.
162
- */
163
- function containsNGram(bodyJoined, tokens, n) {
164
- if (tokens.length < n)
165
- return false;
166
- for (let i = 0; i + n <= tokens.length; i += 1) {
167
- const phrase = ` ${tokens.slice(i, i + n).join(" ")} `;
168
- if (bodyJoined.includes(phrase))
169
- return true;
170
- }
171
- return false;
172
- }
173
- /** Lowercase tokens split on non-word characters. Empty strings dropped. */
174
- function tokenize(text) {
175
- return text
176
- .toLowerCase()
177
- .split(/[^a-z0-9_]+/)
178
- .filter((t) => t.length > 0);
179
- }