akm-cli 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (327) hide show
  1. package/package.json +8 -8
  2. package/dist/tests/add-website-source.test.js +0 -119
  3. package/dist/tests/agent/agent-config-loader.test.js +0 -70
  4. package/dist/tests/agent/agent-config.test.js +0 -221
  5. package/dist/tests/agent/agent-detect.test.js +0 -100
  6. package/dist/tests/agent/agent-spawn.test.js +0 -234
  7. package/dist/tests/agent-output.test.js +0 -186
  8. package/dist/tests/architecture/agent-no-llm-sdk-guard.test.js +0 -103
  9. package/dist/tests/architecture/agent-spawn-seam.test.js +0 -193
  10. package/dist/tests/architecture/llm-stateless-seam.test.js +0 -112
  11. package/dist/tests/asset-ref.test.js +0 -192
  12. package/dist/tests/asset-registry.test.js +0 -103
  13. package/dist/tests/asset-spec.test.js +0 -241
  14. package/dist/tests/bench/attribution.test.js +0 -996
  15. package/dist/tests/bench/cleanup-sigint.test.js +0 -83
  16. package/dist/tests/bench/cleanup.js +0 -234
  17. package/dist/tests/bench/cleanup.test.js +0 -166
  18. package/dist/tests/bench/cli.js +0 -1018
  19. package/dist/tests/bench/cli.test.js +0 -445
  20. package/dist/tests/bench/compare.test.js +0 -556
  21. package/dist/tests/bench/corpus.js +0 -317
  22. package/dist/tests/bench/corpus.test.js +0 -258
  23. package/dist/tests/bench/doctor.js +0 -525
  24. package/dist/tests/bench/driver.js +0 -401
  25. package/dist/tests/bench/driver.test.js +0 -584
  26. package/dist/tests/bench/environment.js +0 -233
  27. package/dist/tests/bench/environment.test.js +0 -199
  28. package/dist/tests/bench/evolve-metrics.js +0 -179
  29. package/dist/tests/bench/evolve-metrics.test.js +0 -187
  30. package/dist/tests/bench/evolve.js +0 -647
  31. package/dist/tests/bench/evolve.test.js +0 -624
  32. package/dist/tests/bench/failure-modes.test.js +0 -349
  33. package/dist/tests/bench/feedback-integrity.test.js +0 -457
  34. package/dist/tests/bench/leakage.test.js +0 -228
  35. package/dist/tests/bench/learning-curve.test.js +0 -134
  36. package/dist/tests/bench/metrics.js +0 -2395
  37. package/dist/tests/bench/metrics.test.js +0 -1150
  38. package/dist/tests/bench/no-os-tmpdir-invariant.test.js +0 -43
  39. package/dist/tests/bench/opencode-config.js +0 -194
  40. package/dist/tests/bench/opencode-config.test.js +0 -370
  41. package/dist/tests/bench/report.js +0 -1885
  42. package/dist/tests/bench/report.test.js +0 -1038
  43. package/dist/tests/bench/run-config.js +0 -355
  44. package/dist/tests/bench/run-config.test.js +0 -298
  45. package/dist/tests/bench/run-curate-test.js +0 -32
  46. package/dist/tests/bench/run-failing-tasks.js +0 -56
  47. package/dist/tests/bench/run-full-bench.js +0 -51
  48. package/dist/tests/bench/run-items36-targeted.js +0 -69
  49. package/dist/tests/bench/run-nano-quick.js +0 -42
  50. package/dist/tests/bench/run-waveg-targeted.js +0 -62
  51. package/dist/tests/bench/runner.js +0 -699
  52. package/dist/tests/bench/runner.test.js +0 -958
  53. package/dist/tests/bench/search-bridge.test.js +0 -331
  54. package/dist/tests/bench/tmp.js +0 -131
  55. package/dist/tests/bench/trajectory.js +0 -116
  56. package/dist/tests/bench/trajectory.test.js +0 -127
  57. package/dist/tests/bench/verifier.js +0 -114
  58. package/dist/tests/bench/verifier.test.js +0 -118
  59. package/dist/tests/bench/workflow-evaluator.js +0 -557
  60. package/dist/tests/bench/workflow-evaluator.test.js +0 -421
  61. package/dist/tests/bench/workflow-spec.js +0 -345
  62. package/dist/tests/bench/workflow-spec.test.js +0 -363
  63. package/dist/tests/bench/workflow-trace.js +0 -472
  64. package/dist/tests/bench/workflow-trace.test.js +0 -254
  65. package/dist/tests/benchmark-search-quality.js +0 -536
  66. package/dist/tests/benchmark-suite.js +0 -1441
  67. package/dist/tests/capture-cli.test.js +0 -112
  68. package/dist/tests/cli-errors.test.js +0 -204
  69. package/dist/tests/commands/events.test.js +0 -370
  70. package/dist/tests/commands/history.test.js +0 -418
  71. package/dist/tests/commands/import.test.js +0 -103
  72. package/dist/tests/commands/proposal-cli.test.js +0 -209
  73. package/dist/tests/commands/reflect-propose-cli.test.js +0 -333
  74. package/dist/tests/commands/remember.test.js +0 -97
  75. package/dist/tests/commands/scope-flags.test.js +0 -300
  76. package/dist/tests/commands/search.test.js +0 -537
  77. package/dist/tests/commands/show-indexer-parity.test.js +0 -117
  78. package/dist/tests/commands/show.test.js +0 -294
  79. package/dist/tests/common.test.js +0 -266
  80. package/dist/tests/completions.test.js +0 -142
  81. package/dist/tests/config-cli.test.js +0 -193
  82. package/dist/tests/config-llm-features.test.js +0 -139
  83. package/dist/tests/config.test.js +0 -569
  84. package/dist/tests/contracts/migration-baseline.test.js +0 -43
  85. package/dist/tests/contracts/reflect-propose-envelope.test.js +0 -139
  86. package/dist/tests/contracts/spec-helpers.js +0 -46
  87. package/dist/tests/contracts/v1-spec-section-11-proposal-queue.test.js +0 -228
  88. package/dist/tests/contracts/v1-spec-section-12-agent-config.test.js +0 -56
  89. package/dist/tests/contracts/v1-spec-section-13-lesson-type.test.js +0 -34
  90. package/dist/tests/contracts/v1-spec-section-14-llm-features.test.js +0 -94
  91. package/dist/tests/contracts/v1-spec-section-4-1-asset-types.test.js +0 -39
  92. package/dist/tests/contracts/v1-spec-section-4-2-quality-rules.test.js +0 -44
  93. package/dist/tests/contracts/v1-spec-section-5-configuration.test.js +0 -47
  94. package/dist/tests/contracts/v1-spec-section-6-orchestration.test.js +0 -40
  95. package/dist/tests/contracts/v1-spec-section-7-module-layout.test.js +0 -58
  96. package/dist/tests/contracts/v1-spec-section-8-extension-points.test.js +0 -34
  97. package/dist/tests/contracts/v1-spec-section-9-4-cli-surface.test.js +0 -75
  98. package/dist/tests/contracts/v1-spec-section-9-7-llm-agent-boundary.test.js +0 -36
  99. package/dist/tests/core/write-source.test.js +0 -366
  100. package/dist/tests/curate-command.test.js +0 -87
  101. package/dist/tests/db-scoring.test.js +0 -201
  102. package/dist/tests/db.test.js +0 -654
  103. package/dist/tests/distill-cli-flag.test.js +0 -208
  104. package/dist/tests/distill.test.js +0 -515
  105. package/dist/tests/docker-install.test.js +0 -120
  106. package/dist/tests/e2e.test.js +0 -1419
  107. package/dist/tests/embedder.test.js +0 -340
  108. package/dist/tests/embedding-model-config.test.js +0 -379
  109. package/dist/tests/feedback-command.test.js +0 -172
  110. package/dist/tests/file-context.test.js +0 -552
  111. package/dist/tests/fixtures/scripts/git/summarize-diff.js +0 -9
  112. package/dist/tests/fixtures/scripts/lint/eslint-check.js +0 -7
  113. package/dist/tests/fixtures/stashes/load.js +0 -166
  114. package/dist/tests/fixtures/stashes/load.test.js +0 -97
  115. package/dist/tests/fixtures/stashes/ranking-baseline/scripts/mem0-search.js +0 -12
  116. package/dist/tests/frontmatter.test.js +0 -190
  117. package/dist/tests/fts-field-weighting.test.js +0 -254
  118. package/dist/tests/fuzzy-search.test.js +0 -230
  119. package/dist/tests/git-provider-clone.test.js +0 -45
  120. package/dist/tests/github.test.js +0 -161
  121. package/dist/tests/graph-boost-ranking.test.js +0 -305
  122. package/dist/tests/graph-extraction.test.js +0 -282
  123. package/dist/tests/helpers/usage-events.js +0 -8
  124. package/dist/tests/index-pass-llm.test.js +0 -161
  125. package/dist/tests/indexer.test.js +0 -570
  126. package/dist/tests/info-command.test.js +0 -166
  127. package/dist/tests/init.test.js +0 -69
  128. package/dist/tests/install-script.test.js +0 -246
  129. package/dist/tests/integration/agent-real-profile.test.js +0 -94
  130. package/dist/tests/issue-36-repro.test.js +0 -304
  131. package/dist/tests/issues-191-194.test.js +0 -160
  132. package/dist/tests/lesson-lint.test.js +0 -111
  133. package/dist/tests/llm-client.test.js +0 -115
  134. package/dist/tests/llm-feature-gate.test.js +0 -151
  135. package/dist/tests/llm.test.js +0 -139
  136. package/dist/tests/lockfile.test.js +0 -216
  137. package/dist/tests/manifest.test.js +0 -205
  138. package/dist/tests/markdown.test.js +0 -126
  139. package/dist/tests/matchers-unit.test.js +0 -189
  140. package/dist/tests/memory-inference.test.js +0 -299
  141. package/dist/tests/merge-scoring.test.js +0 -136
  142. package/dist/tests/metadata.test.js +0 -313
  143. package/dist/tests/migration-help.test.js +0 -89
  144. package/dist/tests/origin-resolve.test.js +0 -124
  145. package/dist/tests/output-baseline.test.js +0 -218
  146. package/dist/tests/output-shapes-unit.test.js +0 -478
  147. package/dist/tests/parallel-search.test.js +0 -272
  148. package/dist/tests/parameter-metadata.test.js +0 -365
  149. package/dist/tests/paths.test.js +0 -177
  150. package/dist/tests/progressive-disclosure.test.js +0 -280
  151. package/dist/tests/proposals.test.js +0 -279
  152. package/dist/tests/proposed-quality.test.js +0 -271
  153. package/dist/tests/provider-registry.test.js +0 -32
  154. package/dist/tests/ranking-regression.test.js +0 -548
  155. package/dist/tests/reflect-propose.test.js +0 -455
  156. package/dist/tests/registry-build-index.test.js +0 -394
  157. package/dist/tests/registry-cli.test.js +0 -290
  158. package/dist/tests/registry-index-v2.test.js +0 -430
  159. package/dist/tests/registry-install.test.js +0 -728
  160. package/dist/tests/registry-providers/parity.test.js +0 -189
  161. package/dist/tests/registry-providers/skills-sh.test.js +0 -309
  162. package/dist/tests/registry-providers/static-index.test.js +0 -238
  163. package/dist/tests/registry-resolve.test.js +0 -126
  164. package/dist/tests/registry-search.test.js +0 -923
  165. package/dist/tests/remember-frontmatter.test.js +0 -378
  166. package/dist/tests/remember-unit.test.js +0 -123
  167. package/dist/tests/ripgrep-install.test.js +0 -251
  168. package/dist/tests/ripgrep-resolve.test.js +0 -108
  169. package/dist/tests/ripgrep.test.js +0 -163
  170. package/dist/tests/save-command.test.js +0 -94
  171. package/dist/tests/save-trust-qa-fixes.test.js +0 -270
  172. package/dist/tests/scoring-pipeline.test.js +0 -648
  173. package/dist/tests/search-include-proposed-cli.test.js +0 -118
  174. package/dist/tests/self-update.test.js +0 -442
  175. package/dist/tests/semantic-search-e2e.test.js +0 -512
  176. package/dist/tests/semantic-status.test.js +0 -471
  177. package/dist/tests/setup-run.integration.js +0 -877
  178. package/dist/tests/setup-wizard.test.js +0 -198
  179. package/dist/tests/setup.test.js +0 -131
  180. package/dist/tests/source-add.test.js +0 -11
  181. package/dist/tests/source-clone.test.js +0 -254
  182. package/dist/tests/source-manage.test.js +0 -366
  183. package/dist/tests/source-providers/filesystem.test.js +0 -82
  184. package/dist/tests/source-providers/git.test.js +0 -252
  185. package/dist/tests/source-providers/website.test.js +0 -128
  186. package/dist/tests/source-qa-fixes.test.js +0 -286
  187. package/dist/tests/source-registry.test.js +0 -350
  188. package/dist/tests/source-resolve.test.js +0 -100
  189. package/dist/tests/source-source.test.js +0 -281
  190. package/dist/tests/source.test.js +0 -533
  191. package/dist/tests/tar-utils-scan.test.js +0 -73
  192. package/dist/tests/toggle-components.test.js +0 -73
  193. package/dist/tests/usage-telemetry.test.js +0 -265
  194. package/dist/tests/utility-scoring.test.js +0 -558
  195. package/dist/tests/vault-load-error.test.js +0 -78
  196. package/dist/tests/vault-qa-fixes.test.js +0 -194
  197. package/dist/tests/vault.test.js +0 -429
  198. package/dist/tests/vector-search.test.js +0 -608
  199. package/dist/tests/walker.test.js +0 -252
  200. package/dist/tests/wave2-cluster-bc.test.js +0 -228
  201. package/dist/tests/wave2-cluster-d.test.js +0 -180
  202. package/dist/tests/wave2-cluster-e.test.js +0 -179
  203. package/dist/tests/wiki-qa-fixes.test.js +0 -270
  204. package/dist/tests/wiki.test.js +0 -529
  205. package/dist/tests/workflow-cli.test.js +0 -271
  206. package/dist/tests/workflow-markdown.test.js +0 -171
  207. package/dist/tests/workflow-path-escape.test.js +0 -132
  208. package/dist/tests/workflow-qa-fixes.test.js +0 -395
  209. package/dist/tests/workflows/indexer-rejection.test.js +0 -213
  210. /package/dist/{src/cli.js → cli.js} +0 -0
  211. /package/dist/{src/commands → commands}/completions.js +0 -0
  212. /package/dist/{src/commands → commands}/config-cli.js +0 -0
  213. /package/dist/{src/commands → commands}/curate.js +0 -0
  214. /package/dist/{src/commands → commands}/distill.js +0 -0
  215. /package/dist/{src/commands → commands}/events.js +0 -0
  216. /package/dist/{src/commands → commands}/history.js +0 -0
  217. /package/dist/{src/commands → commands}/info.js +0 -0
  218. /package/dist/{src/commands → commands}/init.js +0 -0
  219. /package/dist/{src/commands → commands}/install-audit.js +0 -0
  220. /package/dist/{src/commands → commands}/installed-stashes.js +0 -0
  221. /package/dist/{src/commands → commands}/migration-help.js +0 -0
  222. /package/dist/{src/commands → commands}/proposal.js +0 -0
  223. /package/dist/{src/commands → commands}/propose.js +0 -0
  224. /package/dist/{src/commands → commands}/reflect.js +0 -0
  225. /package/dist/{src/commands → commands}/registry-search.js +0 -0
  226. /package/dist/{src/commands → commands}/remember.js +0 -0
  227. /package/dist/{src/commands → commands}/search.js +0 -0
  228. /package/dist/{src/commands → commands}/self-update.js +0 -0
  229. /package/dist/{src/commands → commands}/show.js +0 -0
  230. /package/dist/{src/commands → commands}/source-add.js +0 -0
  231. /package/dist/{src/commands → commands}/source-clone.js +0 -0
  232. /package/dist/{src/commands → commands}/source-manage.js +0 -0
  233. /package/dist/{src/commands → commands}/vault.js +0 -0
  234. /package/dist/{src/core → core}/asset-ref.js +0 -0
  235. /package/dist/{src/core → core}/asset-registry.js +0 -0
  236. /package/dist/{src/core → core}/asset-spec.js +0 -0
  237. /package/dist/{src/core → core}/common.js +0 -0
  238. /package/dist/{src/core → core}/config.js +0 -0
  239. /package/dist/{src/core → core}/errors.js +0 -0
  240. /package/dist/{src/core → core}/events.js +0 -0
  241. /package/dist/{src/core → core}/frontmatter.js +0 -0
  242. /package/dist/{src/core → core}/lesson-lint.js +0 -0
  243. /package/dist/{src/core → core}/markdown.js +0 -0
  244. /package/dist/{src/core → core}/paths.js +0 -0
  245. /package/dist/{src/core → core}/proposals.js +0 -0
  246. /package/dist/{src/core → core}/warn.js +0 -0
  247. /package/dist/{src/core → core}/write-source.js +0 -0
  248. /package/dist/{src/indexer → indexer}/db-search.js +0 -0
  249. /package/dist/{src/indexer → indexer}/db.js +0 -0
  250. /package/dist/{src/indexer → indexer}/file-context.js +0 -0
  251. /package/dist/{src/indexer → indexer}/graph-boost.js +0 -0
  252. /package/dist/{src/indexer → indexer}/graph-extraction.js +0 -0
  253. /package/dist/{src/indexer → indexer}/indexer.js +0 -0
  254. /package/dist/{src/indexer → indexer}/manifest.js +0 -0
  255. /package/dist/{src/indexer → indexer}/matchers.js +0 -0
  256. /package/dist/{src/indexer → indexer}/memory-inference.js +0 -0
  257. /package/dist/{src/indexer → indexer}/metadata.js +0 -0
  258. /package/dist/{src/indexer → indexer}/search-fields.js +0 -0
  259. /package/dist/{src/indexer → indexer}/search-source.js +0 -0
  260. /package/dist/{src/indexer → indexer}/semantic-status.js +0 -0
  261. /package/dist/{src/indexer → indexer}/usage-events.js +0 -0
  262. /package/dist/{src/indexer → indexer}/walker.js +0 -0
  263. /package/dist/{src/integrations → integrations}/agent/config.js +0 -0
  264. /package/dist/{src/integrations → integrations}/agent/detect.js +0 -0
  265. /package/dist/{src/integrations → integrations}/agent/index.js +0 -0
  266. /package/dist/{src/integrations → integrations}/agent/profiles.js +0 -0
  267. /package/dist/{src/integrations → integrations}/agent/prompts.js +0 -0
  268. /package/dist/{src/integrations → integrations}/agent/spawn.js +0 -0
  269. /package/dist/{src/integrations → integrations}/github.js +0 -0
  270. /package/dist/{src/integrations → integrations}/lockfile.js +0 -0
  271. /package/dist/{src/llm → llm}/client.js +0 -0
  272. /package/dist/{src/llm → llm}/embedder.js +0 -0
  273. /package/dist/{src/llm → llm}/embedders/cache.js +0 -0
  274. /package/dist/{src/llm → llm}/embedders/local.js +0 -0
  275. /package/dist/{src/llm → llm}/embedders/remote.js +0 -0
  276. /package/dist/{src/llm → llm}/embedders/types.js +0 -0
  277. /package/dist/{src/llm → llm}/feature-gate.js +0 -0
  278. /package/dist/{src/llm → llm}/graph-extract.js +0 -0
  279. /package/dist/{src/llm → llm}/index-passes.js +0 -0
  280. /package/dist/{src/llm → llm}/memory-infer.js +0 -0
  281. /package/dist/{src/llm → llm}/metadata-enhance.js +0 -0
  282. /package/dist/{src/output → output}/cli-hints.js +0 -0
  283. /package/dist/{src/output → output}/context.js +0 -0
  284. /package/dist/{src/output → output}/renderers.js +0 -0
  285. /package/dist/{src/output → output}/shapes.js +0 -0
  286. /package/dist/{src/output → output}/text.js +0 -0
  287. /package/dist/{src/registry → registry}/build-index.js +0 -0
  288. /package/dist/{src/registry → registry}/create-provider-registry.js +0 -0
  289. /package/dist/{src/registry → registry}/factory.js +0 -0
  290. /package/dist/{src/registry → registry}/origin-resolve.js +0 -0
  291. /package/dist/{src/registry → registry}/providers/index.js +0 -0
  292. /package/dist/{src/registry → registry}/providers/skills-sh.js +0 -0
  293. /package/dist/{src/registry → registry}/providers/static-index.js +0 -0
  294. /package/dist/{src/registry → registry}/providers/types.js +0 -0
  295. /package/dist/{src/registry → registry}/resolve.js +0 -0
  296. /package/dist/{src/registry → registry}/types.js +0 -0
  297. /package/dist/{src/setup → setup}/detect.js +0 -0
  298. /package/dist/{src/setup → setup}/ripgrep-install.js +0 -0
  299. /package/dist/{src/setup → setup}/ripgrep-resolve.js +0 -0
  300. /package/dist/{src/setup → setup}/setup.js +0 -0
  301. /package/dist/{src/setup → setup}/steps.js +0 -0
  302. /package/dist/{src/sources → sources}/include.js +0 -0
  303. /package/dist/{src/sources → sources}/provider-factory.js +0 -0
  304. /package/dist/{src/sources → sources}/provider.js +0 -0
  305. /package/dist/{src/sources → sources}/providers/filesystem.js +0 -0
  306. /package/dist/{src/sources → sources}/providers/git.js +0 -0
  307. /package/dist/{src/sources → sources}/providers/index.js +0 -0
  308. /package/dist/{src/sources → sources}/providers/install-types.js +0 -0
  309. /package/dist/{src/sources → sources}/providers/npm.js +0 -0
  310. /package/dist/{src/sources → sources}/providers/provider-utils.js +0 -0
  311. /package/dist/{src/sources → sources}/providers/sync-from-ref.js +0 -0
  312. /package/dist/{src/sources → sources}/providers/tar-utils.js +0 -0
  313. /package/dist/{src/sources → sources}/providers/website.js +0 -0
  314. /package/dist/{src/sources → sources}/resolve.js +0 -0
  315. /package/dist/{src/sources → sources}/types.js +0 -0
  316. /package/dist/{src/templates → templates}/wiki-templates.js +0 -0
  317. /package/dist/{src/version.js → version.js} +0 -0
  318. /package/dist/{src/wiki → wiki}/wiki.js +0 -0
  319. /package/dist/{src/workflows → workflows}/authoring.js +0 -0
  320. /package/dist/{src/workflows → workflows}/cli.js +0 -0
  321. /package/dist/{src/workflows → workflows}/db.js +0 -0
  322. /package/dist/{src/workflows → workflows}/document-cache.js +0 -0
  323. /package/dist/{src/workflows → workflows}/parser.js +0 -0
  324. /package/dist/{src/workflows → workflows}/renderer.js +0 -0
  325. /package/dist/{src/workflows → workflows}/runs.js +0 -0
  326. /package/dist/{src/workflows → workflows}/schema.js +0 -0
  327. /package/dist/{src/workflows → workflows}/validator.js +0 -0
@@ -1,127 +0,0 @@
1
- /**
2
- * Unit tests for the trajectory parser.
3
- */
4
- import { describe, expect, test } from "bun:test";
5
- import { computeTrajectory, VERIFIER_STDOUT_SCAN_CAP } from "./trajectory";
6
- function fakeRun(overrides = {}) {
7
- return {
8
- schemaVersion: 1,
9
- taskId: "x",
10
- arm: "akm",
11
- seed: 0,
12
- model: "m",
13
- outcome: "pass",
14
- tokens: { input: 0, output: 0 },
15
- wallclockMs: 0,
16
- trajectory: { correctAssetLoaded: null, feedbackRecorded: null },
17
- events: [],
18
- verifierStdout: "",
19
- verifierExitCode: 0,
20
- assetsLoaded: [],
21
- ...overrides,
22
- };
23
- }
24
- function feedbackEvent() {
25
- return {
26
- schemaVersion: 1,
27
- id: 0,
28
- ts: "2026-04-27T00:00:00.000Z",
29
- eventType: "feedback",
30
- ref: "skill:foo",
31
- };
32
- }
33
- describe("computeTrajectory.correctAssetLoaded", () => {
34
- test("null when goldRef is missing on the task", () => {
35
- const traj = computeTrajectory({}, fakeRun({ verifierStdout: "akm show skill:irrelevant" }));
36
- expect(traj.correctAssetLoaded).toBeNull();
37
- });
38
- test("true when verifierStdout contains `akm show <goldRef>`", () => {
39
- const traj = computeTrajectory({ goldRef: "skill:docker-homelab" }, fakeRun({
40
- verifierStdout: "tool: akm show skill:docker-homelab\nresult: ok\n",
41
- }));
42
- expect(traj.correctAssetLoaded).toBe(true);
43
- });
44
- test("true when tool-call JSON form contains the ref", () => {
45
- const traj = computeTrajectory({ goldRef: "skill:docker-homelab" }, fakeRun({
46
- verifierStdout: '{"command":"akm","args":["show","skill:docker-homelab"]}',
47
- }));
48
- expect(traj.correctAssetLoaded).toBe(true);
49
- });
50
- test("false when verifierStdout shows a different ref", () => {
51
- const traj = computeTrajectory({ goldRef: "skill:docker-homelab" }, fakeRun({ verifierStdout: "akm show skill:az-cli\n" }));
52
- expect(traj.correctAssetLoaded).toBe(false);
53
- });
54
- test("false on empty trace", () => {
55
- const traj = computeTrajectory({ goldRef: "skill:docker-homelab" }, fakeRun({ verifierStdout: "" }));
56
- expect(traj.correctAssetLoaded).toBe(false);
57
- });
58
- test("true when an event metadata.ref carries the goldRef", () => {
59
- const event = {
60
- schemaVersion: 1,
61
- id: 1,
62
- ts: "2026-04-27T00:00:00.000Z",
63
- eventType: "tool_call",
64
- metadata: { ref: "skill:docker-homelab" },
65
- };
66
- const traj = computeTrajectory({ goldRef: "skill:docker-homelab" }, fakeRun({ events: [event] }));
67
- expect(traj.correctAssetLoaded).toBe(true);
68
- });
69
- });
70
- describe("computeTrajectory.feedbackRecorded", () => {
71
- test("true when events stream contains a `feedback` event", () => {
72
- const traj = computeTrajectory({ goldRef: "skill:foo" }, fakeRun({ events: [feedbackEvent()] }));
73
- expect(traj.feedbackRecorded).toBe(true);
74
- });
75
- test("false when events stream is empty", () => {
76
- const traj = computeTrajectory({ goldRef: "skill:foo" }, fakeRun({ events: [] }));
77
- expect(traj.feedbackRecorded).toBe(false);
78
- });
79
- test("false when events contain other types but no `feedback`", () => {
80
- const event = {
81
- schemaVersion: 1,
82
- id: 0,
83
- ts: "2026-04-27T00:00:00.000Z",
84
- eventType: "remember",
85
- ref: "memory:alpha",
86
- };
87
- const traj = computeTrajectory({ goldRef: "skill:foo" }, fakeRun({ events: [event] }));
88
- expect(traj.feedbackRecorded).toBe(false);
89
- });
90
- });
91
- describe("computeTrajectory verifierStdout cap", () => {
92
- test("trajectory still computes from the prefix when stdout exceeds the cap, and a warning is recorded", () => {
93
- // Construct a stdout: prefix has the canonical `akm show` invocation;
94
- // the rest is GBs-of-junk simulated as a long filler past the cap.
95
- const ref = "skill:docker-homelab";
96
- const prefix = `tool: akm show ${ref}\n`;
97
- const fillerSize = VERIFIER_STDOUT_SCAN_CAP + 1024;
98
- // Use repeated 'a' so total length comfortably exceeds the cap.
99
- const filler = "a".repeat(fillerSize);
100
- const verifierStdout = prefix + filler;
101
- expect(verifierStdout.length).toBeGreaterThan(VERIFIER_STDOUT_SCAN_CAP);
102
- const warnings = [];
103
- const traj = computeTrajectory({ goldRef: ref }, fakeRun({ verifierStdout }), { warnings });
104
- expect(traj.correctAssetLoaded).toBe(true);
105
- expect(warnings.length).toBe(1);
106
- expect(warnings[0]).toContain("verifierStdout truncated");
107
- expect(warnings[0]).toContain(String(VERIFIER_STDOUT_SCAN_CAP));
108
- });
109
- test("no warning when stdout is within the cap", () => {
110
- const warnings = [];
111
- computeTrajectory({ goldRef: "skill:foo" }, fakeRun({ verifierStdout: "akm show skill:foo\n" }), { warnings });
112
- expect(warnings).toEqual([]);
113
- });
114
- test("match found in the prefix even though tail mentions the ref past the cap", () => {
115
- // Prefix has only filler; the gold ref appears only AFTER the cap.
116
- // The scan should miss it (correctly — the agent's effective behaviour
117
- // within the budgeted prefix did not include the show call).
118
- const ref = "skill:never-loaded";
119
- const filler = "x".repeat(VERIFIER_STDOUT_SCAN_CAP);
120
- const verifierStdout = `${filler}akm show ${ref}\n`;
121
- const warnings = [];
122
- const traj = computeTrajectory({ goldRef: ref }, fakeRun({ verifierStdout }), { warnings });
123
- expect(traj.correctAssetLoaded).toBe(false);
124
- expect(warnings.length).toBe(1);
125
- expect(warnings[0]).toContain("verifierStdout truncated");
126
- });
127
- });
@@ -1,114 +0,0 @@
1
- /**
2
- * akm-bench verifier dispatcher (spec §5.3).
3
- *
4
- * • `script` — spawn `<taskDir>/verify.sh` with cwd = workspace.
5
- * • `pytest` — spawn `pytest -q --tb=line` with cwd = workspace.
6
- * • `regex` — match `expected_match` against `agentStdout`.
7
- *
8
- * No LLM-as-judge anywhere. Static dispatch only.
9
- *
10
- * Missing runtime (e.g. `pytest` not on PATH) returns exit code 127 with a
11
- * clear stdout message. The driver maps that to `outcome: "harness_error"`,
12
- * NOT `fail` — a missing tool is not an agent failure.
13
- */
14
- import fs from "node:fs";
15
- import path from "node:path";
16
- function resolveSpawn(config) {
17
- if (config?.spawn)
18
- return config.spawn;
19
- const bun = globalThis.Bun;
20
- if (!bun?.spawn)
21
- throw new Error("Bun.spawn unavailable; pass config.spawn");
22
- return bun.spawn.bind(bun);
23
- }
24
- async function readStream(stream) {
25
- if (!stream)
26
- return "";
27
- try {
28
- return await new Response(stream).text();
29
- }
30
- catch {
31
- return "";
32
- }
33
- }
34
- async function runProcess(cmd, cwd, spawn) {
35
- let proc;
36
- try {
37
- proc = spawn(cmd, {
38
- cwd,
39
- stdout: "pipe",
40
- stderr: "pipe",
41
- });
42
- }
43
- catch (err) {
44
- const message = err instanceof Error ? err.message : String(err);
45
- // ENOENT (binary missing) maps to 127 — the conventional "command not found".
46
- return {
47
- exitCode: 127,
48
- stdout: `verifier failed to spawn: ${message}`,
49
- };
50
- }
51
- const stdoutPromise = readStream(proc.stdout ?? null);
52
- const stderrPromise = readStream(proc.stderr ?? null);
53
- let exitCode;
54
- try {
55
- exitCode = await proc.exited;
56
- }
57
- catch (err) {
58
- const message = err instanceof Error ? err.message : String(err);
59
- return {
60
- exitCode: 127,
61
- stdout: `verifier exited with error: ${message}`,
62
- };
63
- }
64
- const [stdout, stderr] = await Promise.all([stdoutPromise, stderrPromise]);
65
- // Combine stdout+stderr so the operator sees the full verifier output.
66
- const combined = stderr ? `${stdout}\n--- stderr ---\n${stderr}` : stdout;
67
- return { exitCode, stdout: combined };
68
- }
69
- /**
70
- * Dispatch a verifier run. Each branch maps a `task.yaml` `verifier:` field
71
- * onto a deterministic check.
72
- */
73
- export async function runVerifier(taskDir, workspace, kind, config) {
74
- if (kind === "script") {
75
- const script = path.join(taskDir, "verify.sh");
76
- if (!fs.existsSync(script)) {
77
- return { exitCode: 127, stdout: `verify.sh not found at ${script}` };
78
- }
79
- return runProcess(["bash", script], workspace, resolveSpawn(config));
80
- }
81
- if (kind === "pytest") {
82
- // Test files live at <taskDir>/tests/, not inside the workspace copy.
83
- // Pass the absolute path so pytest discovers them while running with
84
- // cwd=workspace (which lets relative paths like pathlib.Path("file.yml") work).
85
- const testsDir = path.join(taskDir, "tests");
86
- const testArgs = fs.existsSync(testsDir) ? [testsDir] : [];
87
- return runProcess(["pytest", "-q", "--tb=line", ...testArgs], workspace, resolveSpawn(config));
88
- }
89
- if (kind === "regex") {
90
- const pattern = config?.expectedMatch;
91
- const input = config?.agentStdout ?? "";
92
- if (!pattern) {
93
- return {
94
- exitCode: 127,
95
- stdout: 'regex verifier requires "expected_match" in task.yaml',
96
- };
97
- }
98
- let regex;
99
- try {
100
- regex = new RegExp(pattern);
101
- }
102
- catch (err) {
103
- const message = err instanceof Error ? err.message : String(err);
104
- return { exitCode: 127, stdout: `invalid regex: ${message}` };
105
- }
106
- const matched = regex.test(input);
107
- return {
108
- exitCode: matched ? 0 : 1,
109
- stdout: matched ? `regex match: ${pattern}` : `regex did not match: ${pattern}`,
110
- };
111
- }
112
- // Compiler should refuse to land an unknown kind; runtime guard is belt-and-braces.
113
- return { exitCode: 127, stdout: `unknown verifier kind: ${String(kind)}` };
114
- }
@@ -1,118 +0,0 @@
1
- /**
2
- * Unit tests for the verifier dispatcher. Covers each of the three
3
- * verifier kinds plus the missing-pytest graceful-127 path.
4
- */
5
- import { afterAll, beforeAll, describe, expect, test } from "bun:test";
6
- import fs from "node:fs";
7
- import path from "node:path";
8
- import { benchMkdtemp } from "./tmp";
9
- import { runVerifier } from "./verifier";
10
- let scratch;
11
- beforeAll(() => {
12
- scratch = benchMkdtemp("bench-verifier-test-");
13
- });
14
- afterAll(() => {
15
- fs.rmSync(scratch, { recursive: true, force: true });
16
- });
17
- function fakeSpawn(exitCode, stdout = "", stderr = "", throwSync) {
18
- return (_cmd, _options) => {
19
- if (throwSync)
20
- throw throwSync;
21
- const proc = {
22
- exitCode,
23
- exited: Promise.resolve(exitCode),
24
- stdout: stdout
25
- ? new ReadableStream({
26
- start(controller) {
27
- controller.enqueue(new TextEncoder().encode(stdout));
28
- controller.close();
29
- },
30
- })
31
- : null,
32
- stderr: stderr
33
- ? new ReadableStream({
34
- start(controller) {
35
- controller.enqueue(new TextEncoder().encode(stderr));
36
- controller.close();
37
- },
38
- })
39
- : null,
40
- stdin: null,
41
- kill() {
42
- /* noop */
43
- },
44
- };
45
- return proc;
46
- };
47
- }
48
- describe("runVerifier — script", () => {
49
- test("returns exit 0 when verify.sh succeeds", async () => {
50
- const taskDir = path.join(scratch, "script-pass");
51
- fs.mkdirSync(taskDir);
52
- fs.writeFileSync(path.join(taskDir, "verify.sh"), "");
53
- const workspace = fs.mkdtempSync(path.join(scratch, "ws-"));
54
- const result = await runVerifier(taskDir, workspace, "script", {
55
- spawn: fakeSpawn(0, "ok"),
56
- });
57
- expect(result.exitCode).toBe(0);
58
- expect(result.stdout).toContain("ok");
59
- });
60
- test("returns 127 when verify.sh is missing", async () => {
61
- const taskDir = path.join(scratch, "script-missing");
62
- fs.mkdirSync(taskDir);
63
- const workspace = fs.mkdtempSync(path.join(scratch, "ws-"));
64
- const result = await runVerifier(taskDir, workspace, "script", {
65
- spawn: fakeSpawn(0),
66
- });
67
- expect(result.exitCode).toBe(127);
68
- expect(result.stdout).toContain("verify.sh not found");
69
- });
70
- });
71
- describe("runVerifier — regex", () => {
72
- test("returns 0 when expected_match matches agent stdout", async () => {
73
- const result = await runVerifier(scratch, scratch, "regex", {
74
- agentStdout: "the agent printed: hello world",
75
- expectedMatch: "hello",
76
- });
77
- expect(result.exitCode).toBe(0);
78
- });
79
- test("returns 1 when expected_match does not match", async () => {
80
- const result = await runVerifier(scratch, scratch, "regex", {
81
- agentStdout: "different output",
82
- expectedMatch: "hello",
83
- });
84
- expect(result.exitCode).toBe(1);
85
- });
86
- test("returns 127 when expected_match missing", async () => {
87
- const result = await runVerifier(scratch, scratch, "regex", {
88
- agentStdout: "anything",
89
- });
90
- expect(result.exitCode).toBe(127);
91
- expect(result.stdout).toContain("expected_match");
92
- });
93
- test("returns 127 on invalid regex pattern", async () => {
94
- const result = await runVerifier(scratch, scratch, "regex", {
95
- agentStdout: "x",
96
- expectedMatch: "(",
97
- });
98
- expect(result.exitCode).toBe(127);
99
- expect(result.stdout).toContain("invalid regex");
100
- });
101
- });
102
- describe("runVerifier — pytest", () => {
103
- test("returns 127 with a clear message when pytest is missing", async () => {
104
- const result = await runVerifier(scratch, scratch, "pytest", {
105
- // Simulate ENOENT: spawn throws when bin not on PATH.
106
- spawn: fakeSpawn(0, "", "", new Error("ENOENT: pytest not found")),
107
- });
108
- expect(result.exitCode).toBe(127);
109
- expect(result.stdout).toContain("ENOENT");
110
- });
111
- test("returns the pytest exit code when present", async () => {
112
- const result = await runVerifier(scratch, scratch, "pytest", {
113
- spawn: fakeSpawn(0, "1 passed in 0.05s"),
114
- });
115
- expect(result.exitCode).toBe(0);
116
- expect(result.stdout).toContain("passed");
117
- });
118
- });