akm-cli 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (333) hide show
  1. package/CHANGELOG.md +66 -0
  2. package/dist/{cli.js → src/cli.js} +712 -34
  3. package/dist/{commands → src/commands}/config-cli.js +47 -4
  4. package/dist/src/commands/distill.js +283 -0
  5. package/dist/src/commands/events.js +108 -0
  6. package/dist/src/commands/history.js +191 -0
  7. package/dist/{commands → src/commands}/installed-stashes.js +1 -1
  8. package/dist/src/commands/proposal.js +119 -0
  9. package/dist/src/commands/propose.js +171 -0
  10. package/dist/src/commands/reflect.js +193 -0
  11. package/dist/{commands → src/commands}/registry-search.js +71 -7
  12. package/dist/{commands → src/commands}/remember.js +12 -0
  13. package/dist/{commands → src/commands}/search.js +104 -4
  14. package/dist/{commands → src/commands}/self-update.js +4 -3
  15. package/dist/{commands → src/commands}/show.js +73 -0
  16. package/dist/{commands → src/commands}/source-add.js +5 -1
  17. package/dist/{commands → src/commands}/source-manage.js +7 -1
  18. package/dist/{core → src/core}/asset-ref.js +5 -5
  19. package/dist/{core → src/core}/asset-spec.js +12 -0
  20. package/dist/{core → src/core}/common.js +1 -1
  21. package/dist/{core → src/core}/config.js +203 -121
  22. package/dist/{core → src/core}/errors.js +4 -0
  23. package/dist/src/core/events.js +239 -0
  24. package/dist/src/core/lesson-lint.js +86 -0
  25. package/dist/src/core/proposals.js +406 -0
  26. package/dist/src/core/warn.js +72 -0
  27. package/dist/{core → src/core}/write-source.js +80 -5
  28. package/dist/{indexer → src/indexer}/db-search.js +114 -24
  29. package/dist/{indexer → src/indexer}/db.js +76 -23
  30. package/dist/{indexer → src/indexer}/file-context.js +0 -3
  31. package/dist/src/indexer/graph-boost.js +179 -0
  32. package/dist/src/indexer/graph-extraction.js +212 -0
  33. package/dist/{indexer → src/indexer}/indexer.js +88 -7
  34. package/dist/{indexer → src/indexer}/matchers.js +1 -1
  35. package/dist/src/indexer/memory-inference.js +263 -0
  36. package/dist/{indexer → src/indexer}/metadata.js +111 -3
  37. package/dist/{indexer → src/indexer}/search-source.js +4 -2
  38. package/dist/src/integrations/agent/config.js +292 -0
  39. package/dist/src/integrations/agent/detect.js +94 -0
  40. package/dist/src/integrations/agent/index.js +17 -0
  41. package/dist/src/integrations/agent/profiles.js +65 -0
  42. package/dist/src/integrations/agent/prompts.js +167 -0
  43. package/dist/src/integrations/agent/spawn.js +272 -0
  44. package/dist/{integrations → src/integrations}/github.js +9 -3
  45. package/dist/{integrations → src/integrations}/lockfile.js +0 -26
  46. package/dist/{llm → src/llm}/client.js +33 -2
  47. package/dist/{llm → src/llm}/embedders/remote.js +37 -3
  48. package/dist/src/llm/feature-gate.js +108 -0
  49. package/dist/src/llm/graph-extract.js +107 -0
  50. package/dist/src/llm/index-passes.js +35 -0
  51. package/dist/src/llm/memory-infer.js +86 -0
  52. package/dist/{output → src/output}/cli-hints.js +15 -2
  53. package/dist/{output → src/output}/renderers.js +63 -2
  54. package/dist/src/output/shapes.js +523 -0
  55. package/dist/src/output/text.js +1116 -0
  56. package/dist/{registry → src/registry}/build-index.js +19 -8
  57. package/dist/{registry → src/registry}/factory.js +0 -8
  58. package/dist/{registry → src/registry}/providers/static-index.js +6 -3
  59. package/dist/{registry → src/registry}/resolve.js +68 -2
  60. package/dist/{setup → src/setup}/setup.js +52 -5
  61. package/dist/{sources → src/sources}/providers/git.js +7 -15
  62. package/dist/{wiki → src/wiki}/wiki.js +54 -6
  63. package/dist/{workflows → src/workflows}/runs.js +37 -3
  64. package/dist/tests/add-website-source.test.js +119 -0
  65. package/dist/tests/agent/agent-config-loader.test.js +70 -0
  66. package/dist/tests/agent/agent-config.test.js +221 -0
  67. package/dist/tests/agent/agent-detect.test.js +100 -0
  68. package/dist/tests/agent/agent-spawn.test.js +234 -0
  69. package/dist/tests/agent-output.test.js +186 -0
  70. package/dist/tests/architecture/agent-no-llm-sdk-guard.test.js +103 -0
  71. package/dist/tests/architecture/agent-spawn-seam.test.js +193 -0
  72. package/dist/tests/architecture/llm-stateless-seam.test.js +112 -0
  73. package/dist/tests/asset-ref.test.js +192 -0
  74. package/dist/tests/asset-registry.test.js +103 -0
  75. package/dist/tests/asset-spec.test.js +241 -0
  76. package/dist/tests/bench/attribution.test.js +996 -0
  77. package/dist/tests/bench/cleanup-sigint.test.js +83 -0
  78. package/dist/tests/bench/cleanup.js +234 -0
  79. package/dist/tests/bench/cleanup.test.js +166 -0
  80. package/dist/tests/bench/cli.js +1018 -0
  81. package/dist/tests/bench/cli.test.js +445 -0
  82. package/dist/tests/bench/compare.test.js +556 -0
  83. package/dist/tests/bench/corpus.js +317 -0
  84. package/dist/tests/bench/corpus.test.js +258 -0
  85. package/dist/tests/bench/doctor.js +525 -0
  86. package/dist/tests/bench/driver.js +401 -0
  87. package/dist/tests/bench/driver.test.js +584 -0
  88. package/dist/tests/bench/environment.js +233 -0
  89. package/dist/tests/bench/environment.test.js +199 -0
  90. package/dist/tests/bench/evolve-metrics.js +179 -0
  91. package/dist/tests/bench/evolve-metrics.test.js +187 -0
  92. package/dist/tests/bench/evolve.js +647 -0
  93. package/dist/tests/bench/evolve.test.js +624 -0
  94. package/dist/tests/bench/failure-modes.test.js +349 -0
  95. package/dist/tests/bench/feedback-integrity.test.js +457 -0
  96. package/dist/tests/bench/leakage.test.js +228 -0
  97. package/dist/tests/bench/learning-curve.test.js +134 -0
  98. package/dist/tests/bench/metrics.js +2395 -0
  99. package/dist/tests/bench/metrics.test.js +1150 -0
  100. package/dist/tests/bench/no-os-tmpdir-invariant.test.js +43 -0
  101. package/dist/tests/bench/opencode-config.js +194 -0
  102. package/dist/tests/bench/opencode-config.test.js +370 -0
  103. package/dist/tests/bench/report.js +1885 -0
  104. package/dist/tests/bench/report.test.js +1038 -0
  105. package/dist/tests/bench/run-config.js +355 -0
  106. package/dist/tests/bench/run-config.test.js +298 -0
  107. package/dist/tests/bench/run-curate-test.js +32 -0
  108. package/dist/tests/bench/run-failing-tasks.js +56 -0
  109. package/dist/tests/bench/run-full-bench.js +51 -0
  110. package/dist/tests/bench/run-items36-targeted.js +69 -0
  111. package/dist/tests/bench/run-nano-quick.js +42 -0
  112. package/dist/tests/bench/run-waveg-targeted.js +62 -0
  113. package/dist/tests/bench/runner.js +699 -0
  114. package/dist/tests/bench/runner.test.js +958 -0
  115. package/dist/tests/bench/search-bridge.test.js +331 -0
  116. package/dist/tests/bench/tmp.js +131 -0
  117. package/dist/tests/bench/trajectory.js +116 -0
  118. package/dist/tests/bench/trajectory.test.js +127 -0
  119. package/dist/tests/bench/verifier.js +114 -0
  120. package/dist/tests/bench/verifier.test.js +118 -0
  121. package/dist/tests/bench/workflow-evaluator.js +557 -0
  122. package/dist/tests/bench/workflow-evaluator.test.js +421 -0
  123. package/dist/tests/bench/workflow-spec.js +345 -0
  124. package/dist/tests/bench/workflow-spec.test.js +363 -0
  125. package/dist/tests/bench/workflow-trace.js +472 -0
  126. package/dist/tests/bench/workflow-trace.test.js +254 -0
  127. package/dist/tests/benchmark-search-quality.js +536 -0
  128. package/dist/tests/benchmark-suite.js +1441 -0
  129. package/dist/tests/capture-cli.test.js +112 -0
  130. package/dist/tests/cli-errors.test.js +204 -0
  131. package/dist/tests/commands/events.test.js +370 -0
  132. package/dist/tests/commands/history.test.js +418 -0
  133. package/dist/tests/commands/import.test.js +103 -0
  134. package/dist/tests/commands/proposal-cli.test.js +209 -0
  135. package/dist/tests/commands/reflect-propose-cli.test.js +333 -0
  136. package/dist/tests/commands/remember.test.js +97 -0
  137. package/dist/tests/commands/scope-flags.test.js +300 -0
  138. package/dist/tests/commands/search.test.js +537 -0
  139. package/dist/tests/commands/show-indexer-parity.test.js +117 -0
  140. package/dist/tests/commands/show.test.js +294 -0
  141. package/dist/tests/common.test.js +266 -0
  142. package/dist/tests/completions.test.js +142 -0
  143. package/dist/tests/config-cli.test.js +193 -0
  144. package/dist/tests/config-llm-features.test.js +139 -0
  145. package/dist/tests/config.test.js +569 -0
  146. package/dist/tests/contracts/migration-baseline.test.js +43 -0
  147. package/dist/tests/contracts/reflect-propose-envelope.test.js +139 -0
  148. package/dist/tests/contracts/spec-helpers.js +46 -0
  149. package/dist/tests/contracts/v1-spec-section-11-proposal-queue.test.js +228 -0
  150. package/dist/tests/contracts/v1-spec-section-12-agent-config.test.js +56 -0
  151. package/dist/tests/contracts/v1-spec-section-13-lesson-type.test.js +34 -0
  152. package/dist/tests/contracts/v1-spec-section-14-llm-features.test.js +94 -0
  153. package/dist/tests/contracts/v1-spec-section-4-1-asset-types.test.js +39 -0
  154. package/dist/tests/contracts/v1-spec-section-4-2-quality-rules.test.js +44 -0
  155. package/dist/tests/contracts/v1-spec-section-5-configuration.test.js +47 -0
  156. package/dist/tests/contracts/v1-spec-section-6-orchestration.test.js +40 -0
  157. package/dist/tests/contracts/v1-spec-section-7-module-layout.test.js +58 -0
  158. package/dist/tests/contracts/v1-spec-section-8-extension-points.test.js +34 -0
  159. package/dist/tests/contracts/v1-spec-section-9-4-cli-surface.test.js +75 -0
  160. package/dist/tests/contracts/v1-spec-section-9-7-llm-agent-boundary.test.js +36 -0
  161. package/dist/tests/core/write-source.test.js +366 -0
  162. package/dist/tests/curate-command.test.js +87 -0
  163. package/dist/tests/db-scoring.test.js +201 -0
  164. package/dist/tests/db.test.js +654 -0
  165. package/dist/tests/distill-cli-flag.test.js +208 -0
  166. package/dist/tests/distill.test.js +515 -0
  167. package/dist/tests/docker-install.test.js +120 -0
  168. package/dist/tests/e2e.test.js +1419 -0
  169. package/dist/tests/embedder.test.js +340 -0
  170. package/dist/tests/embedding-model-config.test.js +379 -0
  171. package/dist/tests/feedback-command.test.js +172 -0
  172. package/dist/tests/file-context.test.js +552 -0
  173. package/dist/tests/fixtures/scripts/git/summarize-diff.js +9 -0
  174. package/dist/tests/fixtures/scripts/lint/eslint-check.js +7 -0
  175. package/dist/tests/fixtures/stashes/load.js +166 -0
  176. package/dist/tests/fixtures/stashes/load.test.js +97 -0
  177. package/dist/tests/fixtures/stashes/ranking-baseline/scripts/mem0-search.js +12 -0
  178. package/dist/tests/frontmatter.test.js +190 -0
  179. package/dist/tests/fts-field-weighting.test.js +254 -0
  180. package/dist/tests/fuzzy-search.test.js +230 -0
  181. package/dist/tests/git-provider-clone.test.js +45 -0
  182. package/dist/tests/github.test.js +161 -0
  183. package/dist/tests/graph-boost-ranking.test.js +305 -0
  184. package/dist/tests/graph-extraction.test.js +282 -0
  185. package/dist/tests/helpers/usage-events.js +8 -0
  186. package/dist/tests/index-pass-llm.test.js +161 -0
  187. package/dist/tests/indexer.test.js +570 -0
  188. package/dist/tests/info-command.test.js +166 -0
  189. package/dist/tests/init.test.js +69 -0
  190. package/dist/tests/install-script.test.js +246 -0
  191. package/dist/tests/integration/agent-real-profile.test.js +94 -0
  192. package/dist/tests/issue-36-repro.test.js +304 -0
  193. package/dist/tests/issues-191-194.test.js +160 -0
  194. package/dist/tests/lesson-lint.test.js +111 -0
  195. package/dist/tests/llm-client.test.js +115 -0
  196. package/dist/tests/llm-feature-gate.test.js +151 -0
  197. package/dist/tests/llm.test.js +139 -0
  198. package/dist/tests/lockfile.test.js +216 -0
  199. package/dist/tests/manifest.test.js +205 -0
  200. package/dist/tests/markdown.test.js +126 -0
  201. package/dist/tests/matchers-unit.test.js +189 -0
  202. package/dist/tests/memory-inference.test.js +299 -0
  203. package/dist/tests/merge-scoring.test.js +136 -0
  204. package/dist/tests/metadata.test.js +313 -0
  205. package/dist/tests/migration-help.test.js +89 -0
  206. package/dist/tests/origin-resolve.test.js +124 -0
  207. package/dist/tests/output-baseline.test.js +218 -0
  208. package/dist/tests/output-shapes-unit.test.js +478 -0
  209. package/dist/tests/parallel-search.test.js +272 -0
  210. package/dist/tests/parameter-metadata.test.js +365 -0
  211. package/dist/tests/paths.test.js +177 -0
  212. package/dist/tests/progressive-disclosure.test.js +280 -0
  213. package/dist/tests/proposals.test.js +279 -0
  214. package/dist/tests/proposed-quality.test.js +271 -0
  215. package/dist/tests/provider-registry.test.js +32 -0
  216. package/dist/tests/ranking-regression.test.js +548 -0
  217. package/dist/tests/reflect-propose.test.js +455 -0
  218. package/dist/tests/registry-build-index.test.js +394 -0
  219. package/dist/tests/registry-cli.test.js +290 -0
  220. package/dist/tests/registry-index-v2.test.js +430 -0
  221. package/dist/tests/registry-install.test.js +728 -0
  222. package/dist/tests/registry-providers/parity.test.js +189 -0
  223. package/dist/tests/registry-providers/skills-sh.test.js +309 -0
  224. package/dist/tests/registry-providers/static-index.test.js +238 -0
  225. package/dist/tests/registry-resolve.test.js +126 -0
  226. package/dist/tests/registry-search.test.js +923 -0
  227. package/dist/tests/remember-frontmatter.test.js +378 -0
  228. package/dist/tests/remember-unit.test.js +123 -0
  229. package/dist/tests/ripgrep-install.test.js +251 -0
  230. package/dist/tests/ripgrep-resolve.test.js +108 -0
  231. package/dist/tests/ripgrep.test.js +163 -0
  232. package/dist/tests/save-command.test.js +94 -0
  233. package/dist/tests/save-trust-qa-fixes.test.js +270 -0
  234. package/dist/tests/scoring-pipeline.test.js +648 -0
  235. package/dist/tests/search-include-proposed-cli.test.js +118 -0
  236. package/dist/tests/self-update.test.js +442 -0
  237. package/dist/tests/semantic-search-e2e.test.js +512 -0
  238. package/dist/tests/semantic-status.test.js +471 -0
  239. package/dist/tests/setup-run.integration.js +877 -0
  240. package/dist/tests/setup-wizard.test.js +198 -0
  241. package/dist/tests/setup.test.js +131 -0
  242. package/dist/tests/source-add.test.js +11 -0
  243. package/dist/tests/source-clone.test.js +254 -0
  244. package/dist/tests/source-manage.test.js +366 -0
  245. package/dist/tests/source-providers/filesystem.test.js +82 -0
  246. package/dist/tests/source-providers/git.test.js +252 -0
  247. package/dist/tests/source-providers/website.test.js +128 -0
  248. package/dist/tests/source-qa-fixes.test.js +286 -0
  249. package/dist/tests/source-registry.test.js +350 -0
  250. package/dist/tests/source-resolve.test.js +100 -0
  251. package/dist/tests/source-source.test.js +281 -0
  252. package/dist/tests/source.test.js +533 -0
  253. package/dist/tests/tar-utils-scan.test.js +73 -0
  254. package/dist/tests/toggle-components.test.js +73 -0
  255. package/dist/tests/usage-telemetry.test.js +265 -0
  256. package/dist/tests/utility-scoring.test.js +558 -0
  257. package/dist/tests/vault-load-error.test.js +78 -0
  258. package/dist/tests/vault-qa-fixes.test.js +194 -0
  259. package/dist/tests/vault.test.js +429 -0
  260. package/dist/tests/vector-search.test.js +608 -0
  261. package/dist/tests/walker.test.js +252 -0
  262. package/dist/tests/wave2-cluster-bc.test.js +228 -0
  263. package/dist/tests/wave2-cluster-d.test.js +180 -0
  264. package/dist/tests/wave2-cluster-e.test.js +179 -0
  265. package/dist/tests/wiki-qa-fixes.test.js +270 -0
  266. package/dist/tests/wiki.test.js +529 -0
  267. package/dist/tests/workflow-cli.test.js +271 -0
  268. package/dist/tests/workflow-markdown.test.js +171 -0
  269. package/dist/tests/workflow-path-escape.test.js +132 -0
  270. package/dist/tests/workflow-qa-fixes.test.js +395 -0
  271. package/dist/tests/workflows/indexer-rejection.test.js +213 -0
  272. package/docs/README.md +8 -0
  273. package/docs/migration/release-notes/0.7.0.md +244 -0
  274. package/package.json +2 -2
  275. package/dist/core/warn.js +0 -27
  276. package/dist/output/shapes.js +0 -212
  277. package/dist/output/text.js +0 -520
  278. /package/dist/{commands → src/commands}/completions.js +0 -0
  279. /package/dist/{commands → src/commands}/curate.js +0 -0
  280. /package/dist/{commands → src/commands}/info.js +0 -0
  281. /package/dist/{commands → src/commands}/init.js +0 -0
  282. /package/dist/{commands → src/commands}/install-audit.js +0 -0
  283. /package/dist/{commands → src/commands}/migration-help.js +0 -0
  284. /package/dist/{commands → src/commands}/source-clone.js +0 -0
  285. /package/dist/{commands → src/commands}/vault.js +0 -0
  286. /package/dist/{core → src/core}/asset-registry.js +0 -0
  287. /package/dist/{core → src/core}/frontmatter.js +0 -0
  288. /package/dist/{core → src/core}/markdown.js +0 -0
  289. /package/dist/{core → src/core}/paths.js +0 -0
  290. /package/dist/{indexer → src/indexer}/manifest.js +0 -0
  291. /package/dist/{indexer → src/indexer}/search-fields.js +0 -0
  292. /package/dist/{indexer → src/indexer}/semantic-status.js +0 -0
  293. /package/dist/{indexer → src/indexer}/usage-events.js +0 -0
  294. /package/dist/{indexer → src/indexer}/walker.js +0 -0
  295. /package/dist/{llm → src/llm}/embedder.js +0 -0
  296. /package/dist/{llm → src/llm}/embedders/cache.js +0 -0
  297. /package/dist/{llm → src/llm}/embedders/local.js +0 -0
  298. /package/dist/{llm → src/llm}/embedders/types.js +0 -0
  299. /package/dist/{llm → src/llm}/metadata-enhance.js +0 -0
  300. /package/dist/{output → src/output}/context.js +0 -0
  301. /package/dist/{registry → src/registry}/create-provider-registry.js +0 -0
  302. /package/dist/{registry → src/registry}/origin-resolve.js +0 -0
  303. /package/dist/{registry → src/registry}/providers/index.js +0 -0
  304. /package/dist/{registry → src/registry}/providers/skills-sh.js +0 -0
  305. /package/dist/{registry → src/registry}/providers/types.js +0 -0
  306. /package/dist/{registry → src/registry}/types.js +0 -0
  307. /package/dist/{setup → src/setup}/detect.js +0 -0
  308. /package/dist/{setup → src/setup}/ripgrep-install.js +0 -0
  309. /package/dist/{setup → src/setup}/ripgrep-resolve.js +0 -0
  310. /package/dist/{setup → src/setup}/steps.js +0 -0
  311. /package/dist/{sources → src/sources}/include.js +0 -0
  312. /package/dist/{sources → src/sources}/provider-factory.js +0 -0
  313. /package/dist/{sources → src/sources}/provider.js +0 -0
  314. /package/dist/{sources → src/sources}/providers/filesystem.js +0 -0
  315. /package/dist/{sources → src/sources}/providers/index.js +0 -0
  316. /package/dist/{sources → src/sources}/providers/install-types.js +0 -0
  317. /package/dist/{sources → src/sources}/providers/npm.js +0 -0
  318. /package/dist/{sources → src/sources}/providers/provider-utils.js +0 -0
  319. /package/dist/{sources → src/sources}/providers/sync-from-ref.js +0 -0
  320. /package/dist/{sources → src/sources}/providers/tar-utils.js +0 -0
  321. /package/dist/{sources → src/sources}/providers/website.js +0 -0
  322. /package/dist/{sources → src/sources}/resolve.js +0 -0
  323. /package/dist/{sources → src/sources}/types.js +0 -0
  324. /package/dist/{templates → src/templates}/wiki-templates.js +0 -0
  325. /package/dist/{version.js → src/version.js} +0 -0
  326. /package/dist/{workflows → src/workflows}/authoring.js +0 -0
  327. /package/dist/{workflows → src/workflows}/cli.js +0 -0
  328. /package/dist/{workflows → src/workflows}/db.js +0 -0
  329. /package/dist/{workflows → src/workflows}/document-cache.js +0 -0
  330. /package/dist/{workflows → src/workflows}/parser.js +0 -0
  331. /package/dist/{workflows → src/workflows}/renderer.js +0 -0
  332. /package/dist/{workflows → src/workflows}/schema.js +0 -0
  333. /package/dist/{workflows → src/workflows}/validator.js +0 -0
@@ -0,0 +1,233 @@
1
+ /**
2
+ * environment.ts — unified bench environment setup.
3
+ *
4
+ * `setupBenchEnvironment` is the single function that owns all per-run
5
+ * isolation: isolation dirs, opencode.json, akm config, FTS5 index. Both
6
+ * `runOne` (driver.ts) and the doctor's live-run check call this function,
7
+ * guaranteeing they produce identical environments.
8
+ *
9
+ * Key design decisions:
10
+ * - `BENCH_OPENCODE_INVARIANTS` (plugin:[], permission block) are always
11
+ * written — they are bench isolation invariants, not conditional on the
12
+ * provider path. No silent stub fallbacks.
13
+ * - `dryRun: true` skips the akm config and index writes. Unit tests set
14
+ * this so the setup path is exercised without spawning a real agent.
15
+ * - `validateFixtureCorpus` is called at bench startup to catch missing
16
+ * fixtures before any work items start, not per-task mid-run.
17
+ */
18
+ import fs from "node:fs";
19
+ import path from "node:path";
20
+ import { buildIsolatedEnv, buildSanitizedEnvSource, createIsolationDirs } from "./driver";
21
+ import { BenchConfigError, selectProviderForModel } from "./opencode-config";
22
+ import { benchMkdtemp } from "./tmp";
23
+ // ── Bench isolation invariants ───────────────────────────────────────────────
24
+ /**
25
+ * Top-level keys written unconditionally into every bench-generated
26
+ * opencode.json. These are isolation invariants — never conditional on
27
+ * provider resolution or model type.
28
+ *
29
+ * - `plugin: []` — prevents operator plugins (akm-opencode, etc.) from
30
+ * running lifecycle hooks that override AKM_STASH_DIR, warm indexes
31
+ * against the wrong stash, or prompt akm setup wizards.
32
+ * - `permission` — opencode in non-interactive (`opencode run`) mode
33
+ * silently skips tool calls without explicit permission grants.
34
+ */
35
+ export const BENCH_OPENCODE_INVARIANTS = {
36
+ plugin: [],
37
+ permission: {
38
+ bash: "allow",
39
+ edit: "allow",
40
+ write: "allow",
41
+ read: "allow",
42
+ webfetch: "allow",
43
+ },
44
+ };
45
+ // ── Built-in cloud prefixes ──────────────────────────────────────────────────
46
+ /**
47
+ * opencode provider prefixes that resolve via its built-in cloud-provider
48
+ * registry. Models with one of these prefixes do not need a custom provider
49
+ * entry in the bench providers JSON. Models with any other prefix require
50
+ * `opencodeProviders` — the harness refuses to run without it to prevent
51
+ * silent cloud-model fallback and unexpected API charges.
52
+ */
53
+ export const BUILTIN_CLOUD_PREFIXES = new Set([
54
+ "anthropic",
55
+ "openai",
56
+ "openrouter",
57
+ "opencode",
58
+ "google",
59
+ "amazon",
60
+ "azure",
61
+ "vertex",
62
+ "bedrock",
63
+ "mistral",
64
+ "groq",
65
+ "together",
66
+ "fireworks",
67
+ ]);
68
+ /**
69
+ * Write an `opencode.json` into `opencodeConfigDir`.
70
+ *
71
+ * Always includes `BENCH_OPENCODE_INVARIANTS` (plugin:[], permission block).
72
+ * When `providers` is supplied and the model prefix resolves, the `provider`
73
+ * block is added. When the prefix is not found in the providers map (built-in
74
+ * cloud model), the file is written without a provider block and a warning is
75
+ * returned — this is not an error because built-in cloud models resolve via
76
+ * opencode's own registry.
77
+ *
78
+ * Returns a `WriteOpencodeJsonResult` — never throws for expected cases.
79
+ * Throws for unexpected FS errors.
80
+ */
81
+ export function writeOpencodeJson(opencodeConfigDir, model, providers) {
82
+ const warnings = [];
83
+ let providerKey;
84
+ let providerBlock;
85
+ if (providers) {
86
+ try {
87
+ const selected = selectProviderForModel(providers, model);
88
+ providerKey = selected.providerKey;
89
+ providerBlock = { [selected.providerKey]: selected.entry };
90
+ }
91
+ catch (err) {
92
+ if (err instanceof BenchConfigError) {
93
+ // Check if this is a local-provider model that MUST have a provider block.
94
+ const modelPrefix = model.split("/")[0];
95
+ if (modelPrefix && !BUILTIN_CLOUD_PREFIXES.has(modelPrefix)) {
96
+ // Local-prefix model not in providers map — this is a hard error, not a
97
+ // fallback. Writing opencode.json without a provider block would cause
98
+ // opencode to use cloud resolution, skewing results and incurring costs.
99
+ throw new BenchConfigError(`model "${model}" uses local prefix "${modelPrefix}" but was not found in the providers config. ` +
100
+ `Add it to the providers file or use a built-in cloud model prefix.`, true);
101
+ }
102
+ warnings.push(`model "${model}" not found in providers config; writing stub (expected for built-in cloud models)`);
103
+ }
104
+ else {
105
+ throw err;
106
+ }
107
+ }
108
+ }
109
+ const config = {
110
+ $schema: "https://opencode.ai/config.json",
111
+ model,
112
+ ...BENCH_OPENCODE_INVARIANTS,
113
+ ...(providerBlock ? { provider: providerBlock } : {}),
114
+ };
115
+ fs.writeFileSync(path.join(opencodeConfigDir, "opencode.json"), JSON.stringify(config, null, 2), { mode: 0o600 });
116
+ return { providerKey, warnings };
117
+ }
118
+ /**
119
+ * Set up a complete bench run environment.
120
+ *
121
+ * 1. Creates isolation dirs (XDG_CACHE_HOME, XDG_CONFIG_HOME, OPENCODE_CONFIG).
122
+ * 2. Writes opencode.json with BENCH_OPENCODE_INVARIANTS + optional provider.
123
+ * 3. Writes $XDG_CONFIG_HOME/akm/config.json so the akm CLI and any plugin
124
+ * find the correct stash via `akm config get stashDir`.
125
+ * 4. Copies the pre-built FTS5 index into XDG_CACHE_HOME, or re-indexes as
126
+ * fallback if no pre-built cache is available.
127
+ *
128
+ * Throws `BenchConfigError` for model prefix / provider mismatches.
129
+ */
130
+ export function setupBenchEnvironment(params) {
131
+ const { model, arm, stashDir: rawStashDir, indexCacheHome, providers, dryRun = false, warnings = [] } = params;
132
+ // Synthetic arm must never carry a stash.
133
+ const stashDir = arm === "synthetic" ? undefined : rawStashDir;
134
+ // Safety: refuse to run local-provider models without a providers config.
135
+ const modelParts = model.split("/");
136
+ if (modelParts.length >= 2 && !BUILTIN_CLOUD_PREFIXES.has(modelParts[0]) && !providers) {
137
+ throw new BenchConfigError(`model "${model}" uses custom provider prefix "${modelParts[0]}" — supply opencodeProviders to avoid silent fallback to a cloud model`, false);
138
+ }
139
+ const dirs = createIsolationDirs(stashDir);
140
+ const env = buildIsolatedEnv(dirs, model);
141
+ // Synthetic arm must not carry AKM_STASH_DIR even if createIsolationDirs
142
+ // somehow set it (recurrence guard for the #243 fixup pattern).
143
+ if (arm === "synthetic") {
144
+ delete env.AKM_STASH_DIR;
145
+ }
146
+ // Write opencode.json with invariants + optional provider block.
147
+ const result = writeOpencodeJson(dirs.opencodeConfig, model, providers);
148
+ for (const w of result.warnings)
149
+ warnings.push(w);
150
+ // Wire akm config and index only when a real stash is on disk.
151
+ const stashOnDisk = stashDir ? fs.existsSync(stashDir) : false;
152
+ if (stashDir && stashOnDisk && !dryRun) {
153
+ // akm config: so `akm config get stashDir` returns the fixture path
154
+ // and the akm-opencode plugin (if somehow re-enabled) injects the right
155
+ // AKM_STASH_DIR into the bash-tool env via its shell.env hook.
156
+ const akmConfigDir = path.join(dirs.configHome, "akm");
157
+ fs.mkdirSync(akmConfigDir, { recursive: true });
158
+ fs.writeFileSync(path.join(akmConfigDir, "config.json"), JSON.stringify({ stashDir }), { mode: 0o600 });
159
+ // FTS5 index: fast-path copy from pre-built cache; slow-path re-index.
160
+ const destAkmDir = path.join(dirs.cacheHome, "akm");
161
+ fs.mkdirSync(destAkmDir, { recursive: true });
162
+ if (indexCacheHome) {
163
+ const srcAkmDir = path.join(indexCacheHome, "akm");
164
+ try {
165
+ for (const entry of fs.readdirSync(srcAkmDir)) {
166
+ fs.copyFileSync(path.join(srcAkmDir, entry), path.join(destAkmDir, entry));
167
+ }
168
+ }
169
+ catch (err) {
170
+ warnings.push(`index copy failed, falling back to re-index: ${err.message}`);
171
+ _runAkmIndex(stashDir, env);
172
+ }
173
+ }
174
+ else {
175
+ _runAkmIndex(stashDir, env);
176
+ }
177
+ }
178
+ return {
179
+ dirs,
180
+ env,
181
+ teardown() {
182
+ try {
183
+ fs.rmSync(dirs.root, { recursive: true, force: true });
184
+ }
185
+ catch {
186
+ /* swallow */
187
+ }
188
+ },
189
+ };
190
+ }
191
+ function _runAkmIndex(stashDir, env) {
192
+ const cliEntry = path.resolve(__dirname, "..", "..", "src", "cli.ts");
193
+ Bun.spawnSync({
194
+ cmd: ["bun", "run", cliEntry, "index", "--full"],
195
+ cwd: stashDir,
196
+ env: { ...buildSanitizedEnvSource(), ...env },
197
+ stdout: "pipe",
198
+ stderr: "pipe",
199
+ });
200
+ }
201
+ // ── validateFixtureCorpus ────────────────────────────────────────────────────
202
+ const FIXTURES_ROOT = path.resolve(__dirname, "..", "fixtures", "stashes");
203
+ /**
204
+ * Validate that all task stash references name fixtures that exist on disk
205
+ * (i.e. have a MANIFEST.json). Returns the set of missing fixture names.
206
+ *
207
+ * Call at bench startup before creating any work items. A non-empty `missing`
208
+ * set means those tasks will produce `harness_error` at run time — better to
209
+ * surface that now with named failures than to discover it per-seed.
210
+ */
211
+ export function validateFixtureCorpus(tasks) {
212
+ const byFixture = new Map();
213
+ for (const t of tasks) {
214
+ if (!byFixture.has(t.stash))
215
+ byFixture.set(t.stash, []);
216
+ byFixture.get(t.stash)?.push(t.id);
217
+ }
218
+ const valid = new Set();
219
+ const missing = new Map();
220
+ for (const [fixture, taskIds] of byFixture) {
221
+ const manifestPath = path.join(FIXTURES_ROOT, fixture, "MANIFEST.json");
222
+ if (fs.existsSync(manifestPath)) {
223
+ valid.add(fixture);
224
+ }
225
+ else {
226
+ missing.set(fixture, taskIds);
227
+ }
228
+ }
229
+ return { valid, missing };
230
+ }
231
+ // Re-export from driver for consumers that previously imported from there.
232
+ export { buildIsolatedEnv, buildSanitizedEnvSource, createIsolationDirs } from "./driver";
233
+ export { benchMkdtemp };
@@ -0,0 +1,199 @@
1
+ /**
2
+ * Tests for environment.ts — writeOpencodeJson, validateFixtureCorpus,
3
+ * BENCH_OPENCODE_INVARIANTS, and setupBenchEnvironment (dryRun mode).
4
+ */
5
+ import { afterAll, beforeAll, describe, expect, test } from "bun:test";
6
+ import fs from "node:fs";
7
+ import path from "node:path";
8
+ import { BENCH_OPENCODE_INVARIANTS, BUILTIN_CLOUD_PREFIXES, setupBenchEnvironment, validateFixtureCorpus, writeOpencodeJson, } from "./environment";
9
+ import { benchMkdtemp } from "./tmp";
10
+ // ── writeOpencodeJson ────────────────────────────────────────────────────────
11
+ describe("writeOpencodeJson", () => {
12
+ let tmp;
13
+ beforeAll(() => {
14
+ tmp = benchMkdtemp("bench-env-test-");
15
+ });
16
+ afterAll(() => {
17
+ fs.rmSync(tmp, { recursive: true, force: true });
18
+ });
19
+ test("always writes plugin:[] and permission block (isolation invariants)", () => {
20
+ const dir = path.join(tmp, "invariants");
21
+ fs.mkdirSync(dir, { recursive: true });
22
+ writeOpencodeJson(dir, "anthropic/claude-opus-4-7");
23
+ const config = JSON.parse(fs.readFileSync(path.join(dir, "opencode.json"), "utf8"));
24
+ expect(config.plugin).toEqual([]);
25
+ expect(config.permission?.bash).toBe("allow");
26
+ expect(config.permission?.edit).toBe("allow");
27
+ expect(config.permission?.write).toBe("allow");
28
+ });
29
+ test("writes provider block when model resolves in providers map", () => {
30
+ const dir = path.join(tmp, "with-provider");
31
+ fs.mkdirSync(dir, { recursive: true });
32
+ const providers = {
33
+ source: "/fake/providers.json",
34
+ providers: { myprov: { npm: "@ai-sdk/openai-compatible", name: "My Provider" } },
35
+ };
36
+ const result = writeOpencodeJson(dir, "myprov/my-model", providers);
37
+ expect(result.providerKey).toBe("myprov");
38
+ expect(result.warnings).toHaveLength(0);
39
+ const config = JSON.parse(fs.readFileSync(path.join(dir, "opencode.json"), "utf8"));
40
+ expect(config.provider?.myprov).toBeDefined();
41
+ expect(config.model).toBe("myprov/my-model");
42
+ });
43
+ test("writes stub (no provider block) and returns warning for built-in cloud model not in providers map", () => {
44
+ const dir = path.join(tmp, "cloud-stub");
45
+ fs.mkdirSync(dir, { recursive: true });
46
+ const providers = {
47
+ source: "/fake/providers.json",
48
+ providers: { otherprov: {} },
49
+ };
50
+ const result = writeOpencodeJson(dir, "opencode/big-pickle", providers);
51
+ expect(result.providerKey).toBeUndefined();
52
+ expect(result.warnings.length).toBeGreaterThan(0);
53
+ const config = JSON.parse(fs.readFileSync(path.join(dir, "opencode.json"), "utf8"));
54
+ expect(config.provider).toBeUndefined();
55
+ // Invariants still present.
56
+ expect(config.plugin).toEqual([]);
57
+ });
58
+ test("throws BenchConfigError for local-prefix model not found in providers map", () => {
59
+ const dir = path.join(tmp, "local-prefix-missing");
60
+ fs.mkdirSync(dir, { recursive: true });
61
+ const providers = {
62
+ source: "/fake/providers.json",
63
+ providers: { otherprov: {} },
64
+ };
65
+ // "shredder" is not in BUILTIN_CLOUD_PREFIXES and not in the providers map.
66
+ expect(() => writeOpencodeJson(dir, "shredder/qwen3.5-9b", providers)).toThrow(/local prefix/);
67
+ // The opencode.json must NOT have been written (or if partially written, provider block is absent).
68
+ // We check that the function threw rather than silently wrote a cloud-fallback stub.
69
+ });
70
+ test("writes provider block for local-prefix model that IS found in providers map", () => {
71
+ const dir = path.join(tmp, "local-prefix-found");
72
+ fs.mkdirSync(dir, { recursive: true });
73
+ const providers = {
74
+ source: "/fake/providers.json",
75
+ providers: { shredder: { npm: "@ai-sdk/openai-compatible", name: "Shredder" } },
76
+ };
77
+ const result = writeOpencodeJson(dir, "shredder/qwen3.5-9b", providers);
78
+ expect(result.providerKey).toBe("shredder");
79
+ expect(result.warnings).toHaveLength(0);
80
+ const config = JSON.parse(fs.readFileSync(path.join(dir, "opencode.json"), "utf8"));
81
+ expect(config.provider?.shredder).toBeDefined();
82
+ expect(config.model).toBe("shredder/qwen3.5-9b");
83
+ });
84
+ test("mode 0o600 (not world-readable)", () => {
85
+ const dir = path.join(tmp, "mode-check");
86
+ fs.mkdirSync(dir, { recursive: true });
87
+ writeOpencodeJson(dir, "anthropic/claude-opus-4-7");
88
+ const stat = fs.statSync(path.join(dir, "opencode.json"));
89
+ expect(stat.mode & 0o777).toBe(0o600);
90
+ });
91
+ });
92
+ // ── BENCH_OPENCODE_INVARIANTS ────────────────────────────────────────────────
93
+ describe("BENCH_OPENCODE_INVARIANTS", () => {
94
+ test("plugin is an empty readonly array", () => {
95
+ expect(BENCH_OPENCODE_INVARIANTS.plugin).toEqual([]);
96
+ expect(Array.isArray(BENCH_OPENCODE_INVARIANTS.plugin)).toBe(true);
97
+ });
98
+ test("permission.bash is 'allow'", () => {
99
+ expect(BENCH_OPENCODE_INVARIANTS.permission.bash).toBe("allow");
100
+ });
101
+ });
102
+ // ── BUILTIN_CLOUD_PREFIXES ───────────────────────────────────────────────────
103
+ describe("BUILTIN_CLOUD_PREFIXES", () => {
104
+ test("includes anthropic, openai, opencode", () => {
105
+ expect(BUILTIN_CLOUD_PREFIXES.has("anthropic")).toBe(true);
106
+ expect(BUILTIN_CLOUD_PREFIXES.has("openai")).toBe(true);
107
+ expect(BUILTIN_CLOUD_PREFIXES.has("opencode")).toBe(true);
108
+ });
109
+ test("does not include custom provider prefixes like 'shredder' or 'don'", () => {
110
+ expect(BUILTIN_CLOUD_PREFIXES.has("shredder")).toBe(false);
111
+ expect(BUILTIN_CLOUD_PREFIXES.has("don")).toBe(false);
112
+ });
113
+ });
114
+ // ── validateFixtureCorpus ────────────────────────────────────────────────────
115
+ describe("validateFixtureCorpus", () => {
116
+ test("returns known fixtures as valid", () => {
117
+ const tasks = [{ id: "az-cli/foo", stash: "az-cli" }];
118
+ const { valid, missing } = validateFixtureCorpus(tasks);
119
+ expect(valid.has("az-cli")).toBe(true);
120
+ expect(missing.size).toBe(0);
121
+ });
122
+ test("returns nonexistent fixture as missing with its task IDs", () => {
123
+ const tasks = [
124
+ { id: "ghost/task-1", stash: "ghost-fixture" },
125
+ { id: "ghost/task-2", stash: "ghost-fixture" },
126
+ ];
127
+ const { valid, missing } = validateFixtureCorpus(tasks);
128
+ expect(valid.has("ghost-fixture")).toBe(false);
129
+ expect(missing.has("ghost-fixture")).toBe(true);
130
+ expect(missing.get("ghost-fixture")).toEqual(["ghost/task-1", "ghost/task-2"]);
131
+ });
132
+ test("handles empty task list", () => {
133
+ const { valid, missing } = validateFixtureCorpus([]);
134
+ expect(valid.size).toBe(0);
135
+ expect(missing.size).toBe(0);
136
+ });
137
+ test("deduplicates fixture names across tasks", () => {
138
+ const tasks = [
139
+ { id: "az-cli/a", stash: "az-cli" },
140
+ { id: "az-cli/b", stash: "az-cli" },
141
+ { id: "az-cli/c", stash: "az-cli" },
142
+ ];
143
+ const { valid } = validateFixtureCorpus(tasks);
144
+ expect(valid.size).toBe(1);
145
+ });
146
+ });
147
+ // ── setupBenchEnvironment (dryRun) ───────────────────────────────────────────
148
+ describe("setupBenchEnvironment dryRun", () => {
149
+ test("creates isolation dirs and writes opencode.json with invariants", () => {
150
+ const env = setupBenchEnvironment({
151
+ model: "anthropic/claude-opus-4-7",
152
+ arm: "akm",
153
+ dryRun: true,
154
+ });
155
+ try {
156
+ expect(fs.existsSync(env.dirs.cacheHome)).toBe(true);
157
+ expect(fs.existsSync(env.dirs.configHome)).toBe(true);
158
+ expect(fs.existsSync(env.dirs.opencodeConfig)).toBe(true);
159
+ const config = JSON.parse(fs.readFileSync(path.join(env.dirs.opencodeConfig, "opencode.json"), "utf8"));
160
+ expect(config.plugin).toEqual([]);
161
+ expect(config.permission?.bash).toBe("allow");
162
+ }
163
+ finally {
164
+ env.teardown();
165
+ }
166
+ });
167
+ test("throws for custom provider prefix without providers config", () => {
168
+ expect(() => setupBenchEnvironment({
169
+ model: "shredder/qwen/qwen3.5-9b",
170
+ arm: "akm",
171
+ dryRun: true,
172
+ })).toThrow(/custom provider prefix/);
173
+ });
174
+ test("synthetic arm never sets AKM_STASH_DIR", () => {
175
+ const env = setupBenchEnvironment({
176
+ model: "anthropic/claude-opus-4-7",
177
+ arm: "synthetic",
178
+ stashDir: "/some/stash",
179
+ dryRun: true,
180
+ });
181
+ try {
182
+ expect(env.env.AKM_STASH_DIR).toBeUndefined();
183
+ }
184
+ finally {
185
+ env.teardown();
186
+ }
187
+ });
188
+ test("teardown removes the isolation dirs", () => {
189
+ const env = setupBenchEnvironment({
190
+ model: "anthropic/claude-opus-4-7",
191
+ arm: "akm",
192
+ dryRun: true,
193
+ });
194
+ const { root } = env.dirs;
195
+ expect(fs.existsSync(root)).toBe(true);
196
+ env.teardown();
197
+ expect(fs.existsSync(root)).toBe(false);
198
+ });
199
+ });
@@ -0,0 +1,179 @@
1
+ /**
2
+ * Track B lesson quality + reuse metrics (issue #264, spec §6.3 follow-up).
3
+ *
4
+ * `computeLessonMetrics` walks the evolve runner's proposal log and the
5
+ * Phase 3 pre/post arm `RunResult[]`s and emits one `LessonRecord` per
6
+ * lesson-kind proposal. The record captures:
7
+ *
8
+ * - `source_failures` — eval/train tasks whose negative feedback events
9
+ * targeted this asset ref (joined via the supplied `feedbackLog`).
10
+ * - `lint_pass` / `accepted` — verbatim from the proposal log entry.
11
+ * - `first_reused_on` / `reuse_count` / `reuse_pass_rate` — how often the
12
+ * accepted lesson's ref appeared in post-arm runs' `assetsLoaded`, and
13
+ * the pass-rate among those reuses.
14
+ * - `negative_transfer_count` — count of (taskId, seed) pairs where the
15
+ * same task PASSED in pre but FAILED in post AND the post run loaded
16
+ * this lesson's ref. Spec §6.4 negative-transfer attribution.
17
+ * - `leakage_risk` — `"high"` when any verbatim 4-token-or-longer phrase
18
+ * in the supplied verifier source(s) appears verbatim in the lesson
19
+ * body; `"medium"` for 3-token leakage; `"low"` otherwise. Mirrors the
20
+ * Wave 3 `leakage.test.ts` philosophy: structural fragments are red
21
+ * flags, lone tokens are not.
22
+ *
23
+ * The function is pure: no disk I/O, no subprocess. Callers (the evolve
24
+ * runner) thread lesson bodies + verifier sources through optional maps so
25
+ * the leakage check is fully deterministic and testable with mock inputs.
26
+ */
27
+ /**
28
+ * Compute lesson-quality + reuse metrics from the evolve runner's outputs.
29
+ * Pure function — does not touch disk and does not invoke any subprocess.
30
+ *
31
+ * Only `proposalLog` entries with `kind === "lesson"` are surfaced as
32
+ * `LessonRecord`s. Revision-kind proposals are tracked elsewhere (the
33
+ * §6.3 `proposals` block already covers them) and would skew the lesson
34
+ * reuse rate if mixed in.
35
+ */
36
+ export function computeLessonMetrics(input) {
37
+ const lessons = input.proposalLog.filter((p) => p.kind === "lesson");
38
+ const feedbackLog = input.feedbackLog ?? [];
39
+ const preRuns = input.preRuns ?? [];
40
+ const postRuns = input.postRuns ?? [];
41
+ const lessonBodies = input.lessonBodies ?? {};
42
+ const verifierSources = input.verifierSources ?? {};
43
+ // Pre-index pre-arm task → seed → outcome so negative-transfer attribution
44
+ // is a constant-time lookup per post run.
45
+ const preOutcomes = new Map();
46
+ for (const r of preRuns) {
47
+ let inner = preOutcomes.get(r.taskId);
48
+ if (!inner) {
49
+ inner = new Map();
50
+ preOutcomes.set(r.taskId, inner);
51
+ }
52
+ inner.set(r.seed, r.outcome);
53
+ }
54
+ // Pre-index negative feedback by ref so source_failures is O(events).
55
+ const negativeFeedbackByRef = new Map();
56
+ for (const ev of feedbackLog) {
57
+ if (ev.signal !== "negative")
58
+ continue;
59
+ let set = negativeFeedbackByRef.get(ev.goldRef);
60
+ if (!set) {
61
+ set = new Set();
62
+ negativeFeedbackByRef.set(ev.goldRef, set);
63
+ }
64
+ set.add(ev.taskId);
65
+ }
66
+ const records = lessons.map((p) => {
67
+ const ref = p.assetRef;
68
+ const sourceFailures = [...(negativeFeedbackByRef.get(ref) ?? [])].sort();
69
+ // Reuse: post-arm runs that loaded this ref.
70
+ let firstReusedOn = null;
71
+ let reuseCount = 0;
72
+ let reusePassCount = 0;
73
+ // Negative transfer: post-FAIL where pre-PASS for the same (task, seed)
74
+ // AND this lesson was loaded in the post run. Dedupe by taskId so a
75
+ // task that regresses across multiple seeds counts once.
76
+ const negativeTransferTasks = new Set();
77
+ if (p.decision === "accept") {
78
+ for (const r of postRuns) {
79
+ if (!r.assetsLoaded?.includes(ref))
80
+ continue;
81
+ if (firstReusedOn === null)
82
+ firstReusedOn = r.taskId;
83
+ reuseCount += 1;
84
+ if (r.outcome === "pass")
85
+ reusePassCount += 1;
86
+ if (r.outcome === "fail" || r.outcome === "budget_exceeded") {
87
+ const prePerSeed = preOutcomes.get(r.taskId);
88
+ if (prePerSeed && prePerSeed.get(r.seed) === "pass") {
89
+ negativeTransferTasks.add(r.taskId);
90
+ }
91
+ }
92
+ }
93
+ }
94
+ const reusePassRate = reuseCount === 0 ? 0 : reusePassCount / reuseCount;
95
+ const leakageRisk = classifyLeakageRisk(lessonBodies[ref], verifierSources[ref]);
96
+ return {
97
+ ref,
98
+ source_failures: sourceFailures,
99
+ lint_pass: p.lintPass,
100
+ accepted: p.decision === "accept",
101
+ first_reused_on: firstReusedOn,
102
+ reuse_count: reuseCount,
103
+ reuse_pass_rate: reusePassRate,
104
+ negative_transfer_count: negativeTransferTasks.size,
105
+ leakage_risk: leakageRisk,
106
+ };
107
+ });
108
+ records.sort((a, b) => a.ref.localeCompare(b.ref));
109
+ const total = records.length;
110
+ const accepted = records.filter((r) => r.accepted);
111
+ const lintPassed = records.filter((r) => r.lint_pass).length;
112
+ const reusedAccepted = accepted.filter((r) => r.reuse_count > 0);
113
+ const reusePassRateSum = reusedAccepted.reduce((sum, r) => sum + r.reuse_pass_rate, 0);
114
+ const negativeTransferTotal = records.reduce((sum, r) => sum + r.negative_transfer_count, 0);
115
+ return {
116
+ lessons: records,
117
+ lessons_created_count: total,
118
+ lessons_accepted_count: accepted.length,
119
+ proposal_lint_pass_rate: total === 0 ? 0 : lintPassed / total,
120
+ proposal_acceptance_rate: total === 0 ? 0 : accepted.length / total,
121
+ lesson_reuse_rate: accepted.length === 0 ? 0 : reusedAccepted.length / accepted.length,
122
+ lesson_reuse_success_rate: reusedAccepted.length === 0 ? 0 : reusePassRateSum / reusedAccepted.length,
123
+ lesson_negative_transfer_count: negativeTransferTotal,
124
+ };
125
+ }
126
+ /**
127
+ * Classify lesson-body leakage against verifier source text. Returns
128
+ * `"high"` when a 4+-word verbatim phrase from any verifier-source entry
129
+ * appears in the body; `"medium"` for 3-word overlap; `"low"` otherwise.
130
+ *
131
+ * The check is intentionally simple — Wave 3's `leakage.test.ts` uses
132
+ * structural assertion extraction (regex literals, dotted paths, jq/grep
133
+ * patterns); here we just slide an N-gram window over the verifier text
134
+ * and ask "does the body contain this exact run of words?". Tokens are
135
+ * normalised to lowercase and split on non-word boundaries so trivial
136
+ * whitespace differences don't hide leakage.
137
+ */
138
+ export function classifyLeakageRisk(body, verifierSources) {
139
+ if (!body || !verifierSources || verifierSources.length === 0)
140
+ return "low";
141
+ const bodyTokens = tokenize(body);
142
+ if (bodyTokens.length === 0)
143
+ return "low";
144
+ const bodyJoined = ` ${bodyTokens.join(" ")} `;
145
+ let mediumHit = false;
146
+ for (const source of verifierSources) {
147
+ const sourceTokens = tokenize(source);
148
+ if (sourceTokens.length < 3)
149
+ continue;
150
+ if (containsNGram(bodyJoined, sourceTokens, 4))
151
+ return "high";
152
+ if (!mediumHit && containsNGram(bodyJoined, sourceTokens, 3))
153
+ mediumHit = true;
154
+ }
155
+ return mediumHit ? "medium" : "low";
156
+ }
157
+ /**
158
+ * Slide an N-gram window of size `n` across `tokens` and return true if any
159
+ * window appears as a contiguous substring inside `bodyJoined` (which is
160
+ * pre-padded with spaces so word boundaries match cleanly). Skips windows
161
+ * shorter than `n`; returns false on empty input.
162
+ */
163
+ function containsNGram(bodyJoined, tokens, n) {
164
+ if (tokens.length < n)
165
+ return false;
166
+ for (let i = 0; i + n <= tokens.length; i += 1) {
167
+ const phrase = ` ${tokens.slice(i, i + n).join(" ")} `;
168
+ if (bodyJoined.includes(phrase))
169
+ return true;
170
+ }
171
+ return false;
172
+ }
173
+ /** Lowercase tokens split on non-word characters. Empty strings dropped. */
174
+ function tokenize(text) {
175
+ return text
176
+ .toLowerCase()
177
+ .split(/[^a-z0-9_]+/)
178
+ .filter((t) => t.length > 0);
179
+ }