akm-cli 0.6.1 → 0.7.0-rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. package/CHANGELOG.md +66 -0
  2. package/dist/{cli.js → src/cli.js} +620 -26
  3. package/dist/{commands → src/commands}/config-cli.js +5 -4
  4. package/dist/src/commands/distill.js +283 -0
  5. package/dist/src/commands/events.js +108 -0
  6. package/dist/src/commands/history.js +120 -0
  7. package/dist/{commands → src/commands}/installed-stashes.js +1 -1
  8. package/dist/src/commands/proposal.js +119 -0
  9. package/dist/src/commands/propose.js +171 -0
  10. package/dist/src/commands/reflect.js +193 -0
  11. package/dist/{commands → src/commands}/registry-search.js +2 -1
  12. package/dist/{commands → src/commands}/remember.js +12 -0
  13. package/dist/{commands → src/commands}/search.js +74 -1
  14. package/dist/{commands → src/commands}/self-update.js +4 -3
  15. package/dist/{commands → src/commands}/show.js +44 -0
  16. package/dist/{core → src/core}/asset-ref.js +5 -5
  17. package/dist/{core → src/core}/asset-spec.js +12 -0
  18. package/dist/{core → src/core}/common.js +1 -1
  19. package/dist/{core → src/core}/config.js +175 -121
  20. package/dist/{core → src/core}/errors.js +4 -0
  21. package/dist/src/core/events.js +239 -0
  22. package/dist/src/core/lesson-lint.js +86 -0
  23. package/dist/src/core/proposals.js +406 -0
  24. package/dist/src/core/warn.js +72 -0
  25. package/dist/{core → src/core}/write-source.js +80 -5
  26. package/dist/{indexer → src/indexer}/db-search.js +113 -24
  27. package/dist/{indexer → src/indexer}/db.js +76 -23
  28. package/dist/{indexer → src/indexer}/file-context.js +0 -3
  29. package/dist/src/indexer/graph-boost.js +179 -0
  30. package/dist/src/indexer/graph-extraction.js +212 -0
  31. package/dist/{indexer → src/indexer}/indexer.js +73 -6
  32. package/dist/src/indexer/memory-inference.js +263 -0
  33. package/dist/{indexer → src/indexer}/metadata.js +111 -3
  34. package/dist/src/integrations/agent/config.js +292 -0
  35. package/dist/src/integrations/agent/detect.js +94 -0
  36. package/dist/src/integrations/agent/index.js +17 -0
  37. package/dist/src/integrations/agent/profiles.js +65 -0
  38. package/dist/src/integrations/agent/prompts.js +167 -0
  39. package/dist/src/integrations/agent/spawn.js +221 -0
  40. package/dist/{integrations → src/integrations}/lockfile.js +0 -26
  41. package/dist/{llm → src/llm}/client.js +33 -2
  42. package/dist/src/llm/feature-gate.js +108 -0
  43. package/dist/src/llm/graph-extract.js +107 -0
  44. package/dist/src/llm/index-passes.js +35 -0
  45. package/dist/src/llm/memory-infer.js +86 -0
  46. package/dist/{output → src/output}/renderers.js +60 -1
  47. package/dist/src/output/shapes.js +516 -0
  48. package/dist/{output → src/output}/text.js +447 -4
  49. package/dist/{registry → src/registry}/build-index.js +14 -4
  50. package/dist/{registry → src/registry}/factory.js +0 -8
  51. package/dist/{registry → src/registry}/providers/static-index.js +3 -2
  52. package/dist/{registry → src/registry}/resolve.js +68 -2
  53. package/dist/{setup → src/setup}/setup.js +43 -5
  54. package/dist/{sources → src/sources}/providers/git.js +7 -15
  55. package/dist/tests/add-website-source.test.js +119 -0
  56. package/dist/tests/agent/agent-config-loader.test.js +70 -0
  57. package/dist/tests/agent/agent-config.test.js +221 -0
  58. package/dist/tests/agent/agent-detect.test.js +100 -0
  59. package/dist/tests/agent/agent-spawn.test.js +234 -0
  60. package/dist/tests/agent-output.test.js +186 -0
  61. package/dist/tests/architecture/agent-no-llm-sdk-guard.test.js +103 -0
  62. package/dist/tests/architecture/agent-spawn-seam.test.js +193 -0
  63. package/dist/tests/architecture/llm-stateless-seam.test.js +112 -0
  64. package/dist/tests/asset-ref.test.js +192 -0
  65. package/dist/tests/asset-registry.test.js +103 -0
  66. package/dist/tests/asset-spec.test.js +241 -0
  67. package/dist/tests/bench/attribution.test.js +995 -0
  68. package/dist/tests/bench/cleanup-sigint.test.js +83 -0
  69. package/dist/tests/bench/cleanup.js +203 -0
  70. package/dist/tests/bench/cleanup.test.js +166 -0
  71. package/dist/tests/bench/cli.js +683 -0
  72. package/dist/tests/bench/cli.test.js +177 -0
  73. package/dist/tests/bench/compare.test.js +556 -0
  74. package/dist/tests/bench/corpus.js +314 -0
  75. package/dist/tests/bench/corpus.test.js +258 -0
  76. package/dist/tests/bench/driver.js +346 -0
  77. package/dist/tests/bench/driver.test.js +443 -0
  78. package/dist/tests/bench/evolve-metrics.js +179 -0
  79. package/dist/tests/bench/evolve-metrics.test.js +187 -0
  80. package/dist/tests/bench/evolve.js +580 -0
  81. package/dist/tests/bench/evolve.test.js +616 -0
  82. package/dist/tests/bench/failure-modes.test.js +300 -0
  83. package/dist/tests/bench/feedback-integrity.test.js +456 -0
  84. package/dist/tests/bench/leakage.test.js +125 -0
  85. package/dist/tests/bench/learning-curve.test.js +133 -0
  86. package/dist/tests/bench/metrics.js +2319 -0
  87. package/dist/tests/bench/metrics.test.js +1144 -0
  88. package/dist/tests/bench/no-os-tmpdir-invariant.test.js +43 -0
  89. package/dist/tests/bench/report.js +1821 -0
  90. package/dist/tests/bench/report.test.js +989 -0
  91. package/dist/tests/bench/runner.js +536 -0
  92. package/dist/tests/bench/runner.test.js +958 -0
  93. package/dist/tests/bench/search-bridge.test.js +331 -0
  94. package/dist/tests/bench/tmp.js +41 -0
  95. package/dist/tests/bench/trajectory.js +116 -0
  96. package/dist/tests/bench/trajectory.test.js +127 -0
  97. package/dist/tests/bench/verifier.js +109 -0
  98. package/dist/tests/bench/verifier.test.js +118 -0
  99. package/dist/tests/bench/workflow-evaluator.js +557 -0
  100. package/dist/tests/bench/workflow-evaluator.test.js +421 -0
  101. package/dist/tests/bench/workflow-spec.js +358 -0
  102. package/dist/tests/bench/workflow-spec.test.js +363 -0
  103. package/dist/tests/bench/workflow-trace.js +438 -0
  104. package/dist/tests/bench/workflow-trace.test.js +254 -0
  105. package/dist/tests/benchmark-search-quality.js +536 -0
  106. package/dist/tests/benchmark-suite.js +1441 -0
  107. package/dist/tests/capture-cli.test.js +112 -0
  108. package/dist/tests/cli-errors.test.js +203 -0
  109. package/dist/tests/commands/events.test.js +370 -0
  110. package/dist/tests/commands/history.test.js +223 -0
  111. package/dist/tests/commands/import.test.js +103 -0
  112. package/dist/tests/commands/proposal-cli.test.js +209 -0
  113. package/dist/tests/commands/reflect-propose-cli.test.js +333 -0
  114. package/dist/tests/commands/remember.test.js +97 -0
  115. package/dist/tests/commands/scope-flags.test.js +300 -0
  116. package/dist/tests/commands/search.test.js +537 -0
  117. package/dist/tests/commands/show-indexer-parity.test.js +117 -0
  118. package/dist/tests/commands/show.test.js +294 -0
  119. package/dist/tests/common.test.js +266 -0
  120. package/dist/tests/completions.test.js +142 -0
  121. package/dist/tests/config-cli.test.js +193 -0
  122. package/dist/tests/config-llm-features.test.js +139 -0
  123. package/dist/tests/config.test.js +544 -0
  124. package/dist/tests/contracts/migration-baseline.test.js +43 -0
  125. package/dist/tests/contracts/reflect-propose-envelope.test.js +139 -0
  126. package/dist/tests/contracts/spec-helpers.js +46 -0
  127. package/dist/tests/contracts/v1-spec-section-11-proposal-queue.test.js +228 -0
  128. package/dist/tests/contracts/v1-spec-section-12-agent-config.test.js +56 -0
  129. package/dist/tests/contracts/v1-spec-section-13-lesson-type.test.js +34 -0
  130. package/dist/tests/contracts/v1-spec-section-14-llm-features.test.js +94 -0
  131. package/dist/tests/contracts/v1-spec-section-4-1-asset-types.test.js +39 -0
  132. package/dist/tests/contracts/v1-spec-section-4-2-quality-rules.test.js +44 -0
  133. package/dist/tests/contracts/v1-spec-section-5-configuration.test.js +47 -0
  134. package/dist/tests/contracts/v1-spec-section-6-orchestration.test.js +40 -0
  135. package/dist/tests/contracts/v1-spec-section-7-module-layout.test.js +58 -0
  136. package/dist/tests/contracts/v1-spec-section-8-extension-points.test.js +34 -0
  137. package/dist/tests/contracts/v1-spec-section-9-4-cli-surface.test.js +75 -0
  138. package/dist/tests/contracts/v1-spec-section-9-7-llm-agent-boundary.test.js +36 -0
  139. package/dist/tests/core/write-source.test.js +366 -0
  140. package/dist/tests/curate-command.test.js +87 -0
  141. package/dist/tests/db-scoring.test.js +201 -0
  142. package/dist/tests/db.test.js +654 -0
  143. package/dist/tests/distill-cli-flag.test.js +208 -0
  144. package/dist/tests/distill.test.js +515 -0
  145. package/dist/tests/docker-install.test.js +120 -0
  146. package/dist/tests/e2e.test.js +1398 -0
  147. package/dist/tests/embedder.test.js +340 -0
  148. package/dist/tests/embedding-model-config.test.js +379 -0
  149. package/dist/tests/feedback-command.test.js +172 -0
  150. package/dist/tests/file-context.test.js +552 -0
  151. package/dist/tests/fixtures/scripts/git/summarize-diff.js +9 -0
  152. package/dist/tests/fixtures/scripts/lint/eslint-check.js +7 -0
  153. package/dist/tests/fixtures/stashes/load.js +166 -0
  154. package/dist/tests/fixtures/stashes/load.test.js +88 -0
  155. package/dist/tests/fixtures/stashes/ranking-baseline/scripts/mem0-search.js +12 -0
  156. package/dist/tests/frontmatter.test.js +190 -0
  157. package/dist/tests/fts-field-weighting.test.js +254 -0
  158. package/dist/tests/fuzzy-search.test.js +230 -0
  159. package/dist/tests/git-provider-clone.test.js +45 -0
  160. package/dist/tests/github.test.js +161 -0
  161. package/dist/tests/graph-boost-ranking.test.js +305 -0
  162. package/dist/tests/graph-extraction.test.js +282 -0
  163. package/dist/tests/helpers/usage-events.js +8 -0
  164. package/dist/tests/index-pass-llm.test.js +161 -0
  165. package/dist/tests/indexer.test.js +559 -0
  166. package/dist/tests/info-command.test.js +166 -0
  167. package/dist/tests/init.test.js +69 -0
  168. package/dist/tests/install-script.test.js +246 -0
  169. package/dist/tests/integration/agent-real-profile.test.js +94 -0
  170. package/dist/tests/issue-36-repro.test.js +304 -0
  171. package/dist/tests/issues-191-194.test.js +160 -0
  172. package/dist/tests/lesson-lint.test.js +111 -0
  173. package/dist/tests/llm-client.test.js +115 -0
  174. package/dist/tests/llm-feature-gate.test.js +151 -0
  175. package/dist/tests/llm.test.js +139 -0
  176. package/dist/tests/lockfile.test.js +216 -0
  177. package/dist/tests/manifest.test.js +205 -0
  178. package/dist/tests/markdown.test.js +126 -0
  179. package/dist/tests/matchers-unit.test.js +189 -0
  180. package/dist/tests/memory-inference.test.js +299 -0
  181. package/dist/tests/merge-scoring.test.js +136 -0
  182. package/dist/tests/metadata.test.js +313 -0
  183. package/dist/tests/migration-help.test.js +89 -0
  184. package/dist/tests/origin-resolve.test.js +124 -0
  185. package/dist/tests/output-baseline.test.js +217 -0
  186. package/dist/tests/output-shapes-unit.test.js +476 -0
  187. package/dist/tests/parallel-search.test.js +272 -0
  188. package/dist/tests/parameter-metadata.test.js +365 -0
  189. package/dist/tests/paths.test.js +177 -0
  190. package/dist/tests/progressive-disclosure.test.js +280 -0
  191. package/dist/tests/proposals.test.js +279 -0
  192. package/dist/tests/proposed-quality.test.js +271 -0
  193. package/dist/tests/provider-registry.test.js +32 -0
  194. package/dist/tests/ranking-regression.test.js +548 -0
  195. package/dist/tests/reflect-propose.test.js +455 -0
  196. package/dist/tests/registry-build-index.test.js +378 -0
  197. package/dist/tests/registry-cli.test.js +290 -0
  198. package/dist/tests/registry-index-v2.test.js +430 -0
  199. package/dist/tests/registry-install.test.js +728 -0
  200. package/dist/tests/registry-providers/parity.test.js +189 -0
  201. package/dist/tests/registry-providers/skills-sh.test.js +309 -0
  202. package/dist/tests/registry-providers/static-index.test.js +204 -0
  203. package/dist/tests/registry-resolve.test.js +126 -0
  204. package/dist/tests/registry-search.test.js +723 -0
  205. package/dist/tests/remember-frontmatter.test.js +380 -0
  206. package/dist/tests/remember-unit.test.js +123 -0
  207. package/dist/tests/ripgrep-install.test.js +251 -0
  208. package/dist/tests/ripgrep-resolve.test.js +108 -0
  209. package/dist/tests/ripgrep.test.js +163 -0
  210. package/dist/tests/save-command.test.js +94 -0
  211. package/dist/tests/save-trust-qa-fixes.test.js +270 -0
  212. package/dist/tests/scoring-pipeline.test.js +648 -0
  213. package/dist/tests/search-include-proposed-cli.test.js +118 -0
  214. package/dist/tests/self-update.test.js +442 -0
  215. package/dist/tests/semantic-search-e2e.test.js +512 -0
  216. package/dist/tests/semantic-status.test.js +471 -0
  217. package/dist/tests/setup-run.integration.js +877 -0
  218. package/dist/tests/setup-wizard.test.js +198 -0
  219. package/dist/tests/setup.test.js +131 -0
  220. package/dist/tests/source-add.test.js +11 -0
  221. package/dist/tests/source-clone.test.js +254 -0
  222. package/dist/tests/source-manage.test.js +366 -0
  223. package/dist/tests/source-providers/filesystem.test.js +82 -0
  224. package/dist/tests/source-providers/git.test.js +252 -0
  225. package/dist/tests/source-providers/website.test.js +128 -0
  226. package/dist/tests/source-qa-fixes.test.js +268 -0
  227. package/dist/tests/source-registry.test.js +350 -0
  228. package/dist/tests/source-resolve.test.js +100 -0
  229. package/dist/tests/source-source.test.js +221 -0
  230. package/dist/tests/source.test.js +533 -0
  231. package/dist/tests/tar-utils-scan.test.js +73 -0
  232. package/dist/tests/toggle-components.test.js +73 -0
  233. package/dist/tests/usage-telemetry.test.js +265 -0
  234. package/dist/tests/utility-scoring.test.js +558 -0
  235. package/dist/tests/vault-load-error.test.js +78 -0
  236. package/dist/tests/vault-qa-fixes.test.js +194 -0
  237. package/dist/tests/vault.test.js +429 -0
  238. package/dist/tests/vector-search.test.js +608 -0
  239. package/dist/tests/walker.test.js +252 -0
  240. package/dist/tests/wave2-cluster-bc.test.js +228 -0
  241. package/dist/tests/wave2-cluster-d.test.js +180 -0
  242. package/dist/tests/wave2-cluster-e.test.js +179 -0
  243. package/dist/tests/wiki-qa-fixes.test.js +270 -0
  244. package/dist/tests/wiki.test.js +529 -0
  245. package/dist/tests/workflow-cli.test.js +271 -0
  246. package/dist/tests/workflow-markdown.test.js +171 -0
  247. package/dist/tests/workflow-path-escape.test.js +132 -0
  248. package/dist/tests/workflow-qa-fixes.test.js +377 -0
  249. package/dist/tests/workflows/indexer-rejection.test.js +213 -0
  250. package/docs/README.md +8 -0
  251. package/docs/migration/release-notes/0.7.0.md +244 -0
  252. package/package.json +2 -2
  253. package/dist/core/warn.js +0 -27
  254. package/dist/output/shapes.js +0 -212
  255. /package/dist/{commands → src/commands}/completions.js +0 -0
  256. /package/dist/{commands → src/commands}/curate.js +0 -0
  257. /package/dist/{commands → src/commands}/info.js +0 -0
  258. /package/dist/{commands → src/commands}/init.js +0 -0
  259. /package/dist/{commands → src/commands}/install-audit.js +0 -0
  260. /package/dist/{commands → src/commands}/migration-help.js +0 -0
  261. /package/dist/{commands → src/commands}/source-add.js +0 -0
  262. /package/dist/{commands → src/commands}/source-clone.js +0 -0
  263. /package/dist/{commands → src/commands}/source-manage.js +0 -0
  264. /package/dist/{commands → src/commands}/vault.js +0 -0
  265. /package/dist/{core → src/core}/asset-registry.js +0 -0
  266. /package/dist/{core → src/core}/frontmatter.js +0 -0
  267. /package/dist/{core → src/core}/markdown.js +0 -0
  268. /package/dist/{core → src/core}/paths.js +0 -0
  269. /package/dist/{indexer → src/indexer}/manifest.js +0 -0
  270. /package/dist/{indexer → src/indexer}/matchers.js +0 -0
  271. /package/dist/{indexer → src/indexer}/search-fields.js +0 -0
  272. /package/dist/{indexer → src/indexer}/search-source.js +0 -0
  273. /package/dist/{indexer → src/indexer}/semantic-status.js +0 -0
  274. /package/dist/{indexer → src/indexer}/usage-events.js +0 -0
  275. /package/dist/{indexer → src/indexer}/walker.js +0 -0
  276. /package/dist/{integrations → src/integrations}/github.js +0 -0
  277. /package/dist/{llm → src/llm}/embedder.js +0 -0
  278. /package/dist/{llm → src/llm}/embedders/cache.js +0 -0
  279. /package/dist/{llm → src/llm}/embedders/local.js +0 -0
  280. /package/dist/{llm → src/llm}/embedders/remote.js +0 -0
  281. /package/dist/{llm → src/llm}/embedders/types.js +0 -0
  282. /package/dist/{llm → src/llm}/metadata-enhance.js +0 -0
  283. /package/dist/{output → src/output}/cli-hints.js +0 -0
  284. /package/dist/{output → src/output}/context.js +0 -0
  285. /package/dist/{registry → src/registry}/create-provider-registry.js +0 -0
  286. /package/dist/{registry → src/registry}/origin-resolve.js +0 -0
  287. /package/dist/{registry → src/registry}/providers/index.js +0 -0
  288. /package/dist/{registry → src/registry}/providers/skills-sh.js +0 -0
  289. /package/dist/{registry → src/registry}/providers/types.js +0 -0
  290. /package/dist/{registry → src/registry}/types.js +0 -0
  291. /package/dist/{setup → src/setup}/detect.js +0 -0
  292. /package/dist/{setup → src/setup}/ripgrep-install.js +0 -0
  293. /package/dist/{setup → src/setup}/ripgrep-resolve.js +0 -0
  294. /package/dist/{setup → src/setup}/steps.js +0 -0
  295. /package/dist/{sources → src/sources}/include.js +0 -0
  296. /package/dist/{sources → src/sources}/provider-factory.js +0 -0
  297. /package/dist/{sources → src/sources}/provider.js +0 -0
  298. /package/dist/{sources → src/sources}/providers/filesystem.js +0 -0
  299. /package/dist/{sources → src/sources}/providers/index.js +0 -0
  300. /package/dist/{sources → src/sources}/providers/install-types.js +0 -0
  301. /package/dist/{sources → src/sources}/providers/npm.js +0 -0
  302. /package/dist/{sources → src/sources}/providers/provider-utils.js +0 -0
  303. /package/dist/{sources → src/sources}/providers/sync-from-ref.js +0 -0
  304. /package/dist/{sources → src/sources}/providers/tar-utils.js +0 -0
  305. /package/dist/{sources → src/sources}/providers/website.js +0 -0
  306. /package/dist/{sources → src/sources}/resolve.js +0 -0
  307. /package/dist/{sources → src/sources}/types.js +0 -0
  308. /package/dist/{templates → src/templates}/wiki-templates.js +0 -0
  309. /package/dist/{version.js → src/version.js} +0 -0
  310. /package/dist/{wiki → src/wiki}/wiki.js +0 -0
  311. /package/dist/{workflows → src/workflows}/authoring.js +0 -0
  312. /package/dist/{workflows → src/workflows}/cli.js +0 -0
  313. /package/dist/{workflows → src/workflows}/db.js +0 -0
  314. /package/dist/{workflows → src/workflows}/document-cache.js +0 -0
  315. /package/dist/{workflows → src/workflows}/parser.js +0 -0
  316. /package/dist/{workflows → src/workflows}/renderer.js +0 -0
  317. /package/dist/{workflows → src/workflows}/runs.js +0 -0
  318. /package/dist/{workflows → src/workflows}/schema.js +0 -0
  319. /package/dist/{workflows → src/workflows}/validator.js +0 -0
@@ -0,0 +1,109 @@
1
+ /**
2
+ * akm-bench verifier dispatcher (spec §5.3).
3
+ *
4
+ * • `script` — spawn `<taskDir>/verify.sh` with cwd = workspace.
5
+ * • `pytest` — spawn `pytest -q --tb=line` with cwd = workspace.
6
+ * • `regex` — match `expected_match` against `agentStdout`.
7
+ *
8
+ * No LLM-as-judge anywhere. Static dispatch only.
9
+ *
10
+ * Missing runtime (e.g. `pytest` not on PATH) returns exit code 127 with a
11
+ * clear stdout message. The driver maps that to `outcome: "harness_error"`,
12
+ * NOT `fail` — a missing tool is not an agent failure.
13
+ */
14
+ import fs from "node:fs";
15
+ import path from "node:path";
16
+ function resolveSpawn(config) {
17
+ if (config?.spawn)
18
+ return config.spawn;
19
+ const bun = globalThis.Bun;
20
+ if (!bun?.spawn)
21
+ throw new Error("Bun.spawn unavailable; pass config.spawn");
22
+ return bun.spawn.bind(bun);
23
+ }
24
+ async function readStream(stream) {
25
+ if (!stream)
26
+ return "";
27
+ try {
28
+ return await new Response(stream).text();
29
+ }
30
+ catch {
31
+ return "";
32
+ }
33
+ }
34
+ async function runProcess(cmd, cwd, spawn) {
35
+ let proc;
36
+ try {
37
+ proc = spawn(cmd, {
38
+ cwd,
39
+ stdout: "pipe",
40
+ stderr: "pipe",
41
+ });
42
+ }
43
+ catch (err) {
44
+ const message = err instanceof Error ? err.message : String(err);
45
+ // ENOENT (binary missing) maps to 127 — the conventional "command not found".
46
+ return {
47
+ exitCode: 127,
48
+ stdout: `verifier failed to spawn: ${message}`,
49
+ };
50
+ }
51
+ const stdoutPromise = readStream(proc.stdout ?? null);
52
+ const stderrPromise = readStream(proc.stderr ?? null);
53
+ let exitCode;
54
+ try {
55
+ exitCode = await proc.exited;
56
+ }
57
+ catch (err) {
58
+ const message = err instanceof Error ? err.message : String(err);
59
+ return {
60
+ exitCode: 127,
61
+ stdout: `verifier exited with error: ${message}`,
62
+ };
63
+ }
64
+ const [stdout, stderr] = await Promise.all([stdoutPromise, stderrPromise]);
65
+ // Combine stdout+stderr so the operator sees the full verifier output.
66
+ const combined = stderr ? `${stdout}\n--- stderr ---\n${stderr}` : stdout;
67
+ return { exitCode, stdout: combined };
68
+ }
69
+ /**
70
+ * Dispatch a verifier run. Each branch maps a `task.yaml` `verifier:` field
71
+ * onto a deterministic check.
72
+ */
73
+ export async function runVerifier(taskDir, workspace, kind, config) {
74
+ if (kind === "script") {
75
+ const script = path.join(taskDir, "verify.sh");
76
+ if (!fs.existsSync(script)) {
77
+ return { exitCode: 127, stdout: `verify.sh not found at ${script}` };
78
+ }
79
+ return runProcess(["bash", script], workspace, resolveSpawn(config));
80
+ }
81
+ if (kind === "pytest") {
82
+ return runProcess(["pytest", "-q", "--tb=line"], workspace, resolveSpawn(config));
83
+ }
84
+ if (kind === "regex") {
85
+ const pattern = config?.expectedMatch;
86
+ const input = config?.agentStdout ?? "";
87
+ if (!pattern) {
88
+ return {
89
+ exitCode: 127,
90
+ stdout: 'regex verifier requires "expected_match" in task.yaml',
91
+ };
92
+ }
93
+ let regex;
94
+ try {
95
+ regex = new RegExp(pattern);
96
+ }
97
+ catch (err) {
98
+ const message = err instanceof Error ? err.message : String(err);
99
+ return { exitCode: 127, stdout: `invalid regex: ${message}` };
100
+ }
101
+ const matched = regex.test(input);
102
+ return {
103
+ exitCode: matched ? 0 : 1,
104
+ stdout: matched ? `regex match: ${pattern}` : `regex did not match: ${pattern}`,
105
+ };
106
+ }
107
+ // Compiler should refuse to land an unknown kind; runtime guard is belt-and-braces.
108
+ return { exitCode: 127, stdout: `unknown verifier kind: ${String(kind)}` };
109
+ }
@@ -0,0 +1,118 @@
1
+ /**
2
+ * Unit tests for the verifier dispatcher. Covers each of the three
3
+ * verifier kinds plus the missing-pytest graceful-127 path.
4
+ */
5
+ import { afterAll, beforeAll, describe, expect, test } from "bun:test";
6
+ import fs from "node:fs";
7
+ import path from "node:path";
8
+ import { benchMkdtemp } from "./tmp";
9
+ import { runVerifier } from "./verifier";
10
+ let scratch;
11
+ beforeAll(() => {
12
+ scratch = benchMkdtemp("bench-verifier-test-");
13
+ });
14
+ afterAll(() => {
15
+ fs.rmSync(scratch, { recursive: true, force: true });
16
+ });
17
+ function fakeSpawn(exitCode, stdout = "", stderr = "", throwSync) {
18
+ return (_cmd, _options) => {
19
+ if (throwSync)
20
+ throw throwSync;
21
+ const proc = {
22
+ exitCode,
23
+ exited: Promise.resolve(exitCode),
24
+ stdout: stdout
25
+ ? new ReadableStream({
26
+ start(controller) {
27
+ controller.enqueue(new TextEncoder().encode(stdout));
28
+ controller.close();
29
+ },
30
+ })
31
+ : null,
32
+ stderr: stderr
33
+ ? new ReadableStream({
34
+ start(controller) {
35
+ controller.enqueue(new TextEncoder().encode(stderr));
36
+ controller.close();
37
+ },
38
+ })
39
+ : null,
40
+ stdin: null,
41
+ kill() {
42
+ /* noop */
43
+ },
44
+ };
45
+ return proc;
46
+ };
47
+ }
48
+ describe("runVerifier — script", () => {
49
+ test("returns exit 0 when verify.sh succeeds", async () => {
50
+ const taskDir = path.join(scratch, "script-pass");
51
+ fs.mkdirSync(taskDir);
52
+ fs.writeFileSync(path.join(taskDir, "verify.sh"), "");
53
+ const workspace = fs.mkdtempSync(path.join(scratch, "ws-"));
54
+ const result = await runVerifier(taskDir, workspace, "script", {
55
+ spawn: fakeSpawn(0, "ok"),
56
+ });
57
+ expect(result.exitCode).toBe(0);
58
+ expect(result.stdout).toContain("ok");
59
+ });
60
+ test("returns 127 when verify.sh is missing", async () => {
61
+ const taskDir = path.join(scratch, "script-missing");
62
+ fs.mkdirSync(taskDir);
63
+ const workspace = fs.mkdtempSync(path.join(scratch, "ws-"));
64
+ const result = await runVerifier(taskDir, workspace, "script", {
65
+ spawn: fakeSpawn(0),
66
+ });
67
+ expect(result.exitCode).toBe(127);
68
+ expect(result.stdout).toContain("verify.sh not found");
69
+ });
70
+ });
71
+ describe("runVerifier — regex", () => {
72
+ test("returns 0 when expected_match matches agent stdout", async () => {
73
+ const result = await runVerifier(scratch, scratch, "regex", {
74
+ agentStdout: "the agent printed: hello world",
75
+ expectedMatch: "hello",
76
+ });
77
+ expect(result.exitCode).toBe(0);
78
+ });
79
+ test("returns 1 when expected_match does not match", async () => {
80
+ const result = await runVerifier(scratch, scratch, "regex", {
81
+ agentStdout: "different output",
82
+ expectedMatch: "hello",
83
+ });
84
+ expect(result.exitCode).toBe(1);
85
+ });
86
+ test("returns 127 when expected_match missing", async () => {
87
+ const result = await runVerifier(scratch, scratch, "regex", {
88
+ agentStdout: "anything",
89
+ });
90
+ expect(result.exitCode).toBe(127);
91
+ expect(result.stdout).toContain("expected_match");
92
+ });
93
+ test("returns 127 on invalid regex pattern", async () => {
94
+ const result = await runVerifier(scratch, scratch, "regex", {
95
+ agentStdout: "x",
96
+ expectedMatch: "(",
97
+ });
98
+ expect(result.exitCode).toBe(127);
99
+ expect(result.stdout).toContain("invalid regex");
100
+ });
101
+ });
102
+ describe("runVerifier — pytest", () => {
103
+ test("returns 127 with a clear message when pytest is missing", async () => {
104
+ const result = await runVerifier(scratch, scratch, "pytest", {
105
+ // Simulate ENOENT: spawn throws when bin not on PATH.
106
+ spawn: fakeSpawn(0, "", "", new Error("ENOENT: pytest not found")),
107
+ });
108
+ expect(result.exitCode).toBe(127);
109
+ expect(result.stdout).toContain("ENOENT");
110
+ });
111
+ test("returns the pytest exit code when present", async () => {
112
+ const result = await runVerifier(scratch, scratch, "pytest", {
113
+ spawn: fakeSpawn(0, "1 passed in 0.05s"),
114
+ });
115
+ expect(result.exitCode).toBe(0);
116
+ expect(result.stdout).toContain("passed");
117
+ });
118
+ });