akm-cli 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (333) hide show
  1. package/CHANGELOG.md +66 -0
  2. package/dist/{cli.js → src/cli.js} +712 -34
  3. package/dist/{commands → src/commands}/config-cli.js +47 -4
  4. package/dist/src/commands/distill.js +283 -0
  5. package/dist/src/commands/events.js +108 -0
  6. package/dist/src/commands/history.js +191 -0
  7. package/dist/{commands → src/commands}/installed-stashes.js +1 -1
  8. package/dist/src/commands/proposal.js +119 -0
  9. package/dist/src/commands/propose.js +171 -0
  10. package/dist/src/commands/reflect.js +193 -0
  11. package/dist/{commands → src/commands}/registry-search.js +71 -7
  12. package/dist/{commands → src/commands}/remember.js +12 -0
  13. package/dist/{commands → src/commands}/search.js +104 -4
  14. package/dist/{commands → src/commands}/self-update.js +4 -3
  15. package/dist/{commands → src/commands}/show.js +73 -0
  16. package/dist/{commands → src/commands}/source-add.js +5 -1
  17. package/dist/{commands → src/commands}/source-manage.js +7 -1
  18. package/dist/{core → src/core}/asset-ref.js +5 -5
  19. package/dist/{core → src/core}/asset-spec.js +12 -0
  20. package/dist/{core → src/core}/common.js +1 -1
  21. package/dist/{core → src/core}/config.js +203 -121
  22. package/dist/{core → src/core}/errors.js +4 -0
  23. package/dist/src/core/events.js +239 -0
  24. package/dist/src/core/lesson-lint.js +86 -0
  25. package/dist/src/core/proposals.js +406 -0
  26. package/dist/src/core/warn.js +72 -0
  27. package/dist/{core → src/core}/write-source.js +80 -5
  28. package/dist/{indexer → src/indexer}/db-search.js +114 -24
  29. package/dist/{indexer → src/indexer}/db.js +76 -23
  30. package/dist/{indexer → src/indexer}/file-context.js +0 -3
  31. package/dist/src/indexer/graph-boost.js +179 -0
  32. package/dist/src/indexer/graph-extraction.js +212 -0
  33. package/dist/{indexer → src/indexer}/indexer.js +88 -7
  34. package/dist/{indexer → src/indexer}/matchers.js +1 -1
  35. package/dist/src/indexer/memory-inference.js +263 -0
  36. package/dist/{indexer → src/indexer}/metadata.js +111 -3
  37. package/dist/{indexer → src/indexer}/search-source.js +4 -2
  38. package/dist/src/integrations/agent/config.js +292 -0
  39. package/dist/src/integrations/agent/detect.js +94 -0
  40. package/dist/src/integrations/agent/index.js +17 -0
  41. package/dist/src/integrations/agent/profiles.js +65 -0
  42. package/dist/src/integrations/agent/prompts.js +167 -0
  43. package/dist/src/integrations/agent/spawn.js +272 -0
  44. package/dist/{integrations → src/integrations}/github.js +9 -3
  45. package/dist/{integrations → src/integrations}/lockfile.js +0 -26
  46. package/dist/{llm → src/llm}/client.js +33 -2
  47. package/dist/{llm → src/llm}/embedders/remote.js +37 -3
  48. package/dist/src/llm/feature-gate.js +108 -0
  49. package/dist/src/llm/graph-extract.js +107 -0
  50. package/dist/src/llm/index-passes.js +35 -0
  51. package/dist/src/llm/memory-infer.js +86 -0
  52. package/dist/{output → src/output}/cli-hints.js +15 -2
  53. package/dist/{output → src/output}/renderers.js +63 -2
  54. package/dist/src/output/shapes.js +523 -0
  55. package/dist/src/output/text.js +1116 -0
  56. package/dist/{registry → src/registry}/build-index.js +19 -8
  57. package/dist/{registry → src/registry}/factory.js +0 -8
  58. package/dist/{registry → src/registry}/providers/static-index.js +6 -3
  59. package/dist/{registry → src/registry}/resolve.js +68 -2
  60. package/dist/{setup → src/setup}/setup.js +52 -5
  61. package/dist/{sources → src/sources}/providers/git.js +7 -15
  62. package/dist/{wiki → src/wiki}/wiki.js +54 -6
  63. package/dist/{workflows → src/workflows}/runs.js +37 -3
  64. package/dist/tests/add-website-source.test.js +119 -0
  65. package/dist/tests/agent/agent-config-loader.test.js +70 -0
  66. package/dist/tests/agent/agent-config.test.js +221 -0
  67. package/dist/tests/agent/agent-detect.test.js +100 -0
  68. package/dist/tests/agent/agent-spawn.test.js +234 -0
  69. package/dist/tests/agent-output.test.js +186 -0
  70. package/dist/tests/architecture/agent-no-llm-sdk-guard.test.js +103 -0
  71. package/dist/tests/architecture/agent-spawn-seam.test.js +193 -0
  72. package/dist/tests/architecture/llm-stateless-seam.test.js +112 -0
  73. package/dist/tests/asset-ref.test.js +192 -0
  74. package/dist/tests/asset-registry.test.js +103 -0
  75. package/dist/tests/asset-spec.test.js +241 -0
  76. package/dist/tests/bench/attribution.test.js +996 -0
  77. package/dist/tests/bench/cleanup-sigint.test.js +83 -0
  78. package/dist/tests/bench/cleanup.js +234 -0
  79. package/dist/tests/bench/cleanup.test.js +166 -0
  80. package/dist/tests/bench/cli.js +1018 -0
  81. package/dist/tests/bench/cli.test.js +445 -0
  82. package/dist/tests/bench/compare.test.js +556 -0
  83. package/dist/tests/bench/corpus.js +317 -0
  84. package/dist/tests/bench/corpus.test.js +258 -0
  85. package/dist/tests/bench/doctor.js +525 -0
  86. package/dist/tests/bench/driver.js +401 -0
  87. package/dist/tests/bench/driver.test.js +584 -0
  88. package/dist/tests/bench/environment.js +233 -0
  89. package/dist/tests/bench/environment.test.js +199 -0
  90. package/dist/tests/bench/evolve-metrics.js +179 -0
  91. package/dist/tests/bench/evolve-metrics.test.js +187 -0
  92. package/dist/tests/bench/evolve.js +647 -0
  93. package/dist/tests/bench/evolve.test.js +624 -0
  94. package/dist/tests/bench/failure-modes.test.js +349 -0
  95. package/dist/tests/bench/feedback-integrity.test.js +457 -0
  96. package/dist/tests/bench/leakage.test.js +228 -0
  97. package/dist/tests/bench/learning-curve.test.js +134 -0
  98. package/dist/tests/bench/metrics.js +2395 -0
  99. package/dist/tests/bench/metrics.test.js +1150 -0
  100. package/dist/tests/bench/no-os-tmpdir-invariant.test.js +43 -0
  101. package/dist/tests/bench/opencode-config.js +194 -0
  102. package/dist/tests/bench/opencode-config.test.js +370 -0
  103. package/dist/tests/bench/report.js +1885 -0
  104. package/dist/tests/bench/report.test.js +1038 -0
  105. package/dist/tests/bench/run-config.js +355 -0
  106. package/dist/tests/bench/run-config.test.js +298 -0
  107. package/dist/tests/bench/run-curate-test.js +32 -0
  108. package/dist/tests/bench/run-failing-tasks.js +56 -0
  109. package/dist/tests/bench/run-full-bench.js +51 -0
  110. package/dist/tests/bench/run-items36-targeted.js +69 -0
  111. package/dist/tests/bench/run-nano-quick.js +42 -0
  112. package/dist/tests/bench/run-waveg-targeted.js +62 -0
  113. package/dist/tests/bench/runner.js +699 -0
  114. package/dist/tests/bench/runner.test.js +958 -0
  115. package/dist/tests/bench/search-bridge.test.js +331 -0
  116. package/dist/tests/bench/tmp.js +131 -0
  117. package/dist/tests/bench/trajectory.js +116 -0
  118. package/dist/tests/bench/trajectory.test.js +127 -0
  119. package/dist/tests/bench/verifier.js +114 -0
  120. package/dist/tests/bench/verifier.test.js +118 -0
  121. package/dist/tests/bench/workflow-evaluator.js +557 -0
  122. package/dist/tests/bench/workflow-evaluator.test.js +421 -0
  123. package/dist/tests/bench/workflow-spec.js +345 -0
  124. package/dist/tests/bench/workflow-spec.test.js +363 -0
  125. package/dist/tests/bench/workflow-trace.js +472 -0
  126. package/dist/tests/bench/workflow-trace.test.js +254 -0
  127. package/dist/tests/benchmark-search-quality.js +536 -0
  128. package/dist/tests/benchmark-suite.js +1441 -0
  129. package/dist/tests/capture-cli.test.js +112 -0
  130. package/dist/tests/cli-errors.test.js +204 -0
  131. package/dist/tests/commands/events.test.js +370 -0
  132. package/dist/tests/commands/history.test.js +418 -0
  133. package/dist/tests/commands/import.test.js +103 -0
  134. package/dist/tests/commands/proposal-cli.test.js +209 -0
  135. package/dist/tests/commands/reflect-propose-cli.test.js +333 -0
  136. package/dist/tests/commands/remember.test.js +97 -0
  137. package/dist/tests/commands/scope-flags.test.js +300 -0
  138. package/dist/tests/commands/search.test.js +537 -0
  139. package/dist/tests/commands/show-indexer-parity.test.js +117 -0
  140. package/dist/tests/commands/show.test.js +294 -0
  141. package/dist/tests/common.test.js +266 -0
  142. package/dist/tests/completions.test.js +142 -0
  143. package/dist/tests/config-cli.test.js +193 -0
  144. package/dist/tests/config-llm-features.test.js +139 -0
  145. package/dist/tests/config.test.js +569 -0
  146. package/dist/tests/contracts/migration-baseline.test.js +43 -0
  147. package/dist/tests/contracts/reflect-propose-envelope.test.js +139 -0
  148. package/dist/tests/contracts/spec-helpers.js +46 -0
  149. package/dist/tests/contracts/v1-spec-section-11-proposal-queue.test.js +228 -0
  150. package/dist/tests/contracts/v1-spec-section-12-agent-config.test.js +56 -0
  151. package/dist/tests/contracts/v1-spec-section-13-lesson-type.test.js +34 -0
  152. package/dist/tests/contracts/v1-spec-section-14-llm-features.test.js +94 -0
  153. package/dist/tests/contracts/v1-spec-section-4-1-asset-types.test.js +39 -0
  154. package/dist/tests/contracts/v1-spec-section-4-2-quality-rules.test.js +44 -0
  155. package/dist/tests/contracts/v1-spec-section-5-configuration.test.js +47 -0
  156. package/dist/tests/contracts/v1-spec-section-6-orchestration.test.js +40 -0
  157. package/dist/tests/contracts/v1-spec-section-7-module-layout.test.js +58 -0
  158. package/dist/tests/contracts/v1-spec-section-8-extension-points.test.js +34 -0
  159. package/dist/tests/contracts/v1-spec-section-9-4-cli-surface.test.js +75 -0
  160. package/dist/tests/contracts/v1-spec-section-9-7-llm-agent-boundary.test.js +36 -0
  161. package/dist/tests/core/write-source.test.js +366 -0
  162. package/dist/tests/curate-command.test.js +87 -0
  163. package/dist/tests/db-scoring.test.js +201 -0
  164. package/dist/tests/db.test.js +654 -0
  165. package/dist/tests/distill-cli-flag.test.js +208 -0
  166. package/dist/tests/distill.test.js +515 -0
  167. package/dist/tests/docker-install.test.js +120 -0
  168. package/dist/tests/e2e.test.js +1419 -0
  169. package/dist/tests/embedder.test.js +340 -0
  170. package/dist/tests/embedding-model-config.test.js +379 -0
  171. package/dist/tests/feedback-command.test.js +172 -0
  172. package/dist/tests/file-context.test.js +552 -0
  173. package/dist/tests/fixtures/scripts/git/summarize-diff.js +9 -0
  174. package/dist/tests/fixtures/scripts/lint/eslint-check.js +7 -0
  175. package/dist/tests/fixtures/stashes/load.js +166 -0
  176. package/dist/tests/fixtures/stashes/load.test.js +97 -0
  177. package/dist/tests/fixtures/stashes/ranking-baseline/scripts/mem0-search.js +12 -0
  178. package/dist/tests/frontmatter.test.js +190 -0
  179. package/dist/tests/fts-field-weighting.test.js +254 -0
  180. package/dist/tests/fuzzy-search.test.js +230 -0
  181. package/dist/tests/git-provider-clone.test.js +45 -0
  182. package/dist/tests/github.test.js +161 -0
  183. package/dist/tests/graph-boost-ranking.test.js +305 -0
  184. package/dist/tests/graph-extraction.test.js +282 -0
  185. package/dist/tests/helpers/usage-events.js +8 -0
  186. package/dist/tests/index-pass-llm.test.js +161 -0
  187. package/dist/tests/indexer.test.js +570 -0
  188. package/dist/tests/info-command.test.js +166 -0
  189. package/dist/tests/init.test.js +69 -0
  190. package/dist/tests/install-script.test.js +246 -0
  191. package/dist/tests/integration/agent-real-profile.test.js +94 -0
  192. package/dist/tests/issue-36-repro.test.js +304 -0
  193. package/dist/tests/issues-191-194.test.js +160 -0
  194. package/dist/tests/lesson-lint.test.js +111 -0
  195. package/dist/tests/llm-client.test.js +115 -0
  196. package/dist/tests/llm-feature-gate.test.js +151 -0
  197. package/dist/tests/llm.test.js +139 -0
  198. package/dist/tests/lockfile.test.js +216 -0
  199. package/dist/tests/manifest.test.js +205 -0
  200. package/dist/tests/markdown.test.js +126 -0
  201. package/dist/tests/matchers-unit.test.js +189 -0
  202. package/dist/tests/memory-inference.test.js +299 -0
  203. package/dist/tests/merge-scoring.test.js +136 -0
  204. package/dist/tests/metadata.test.js +313 -0
  205. package/dist/tests/migration-help.test.js +89 -0
  206. package/dist/tests/origin-resolve.test.js +124 -0
  207. package/dist/tests/output-baseline.test.js +218 -0
  208. package/dist/tests/output-shapes-unit.test.js +478 -0
  209. package/dist/tests/parallel-search.test.js +272 -0
  210. package/dist/tests/parameter-metadata.test.js +365 -0
  211. package/dist/tests/paths.test.js +177 -0
  212. package/dist/tests/progressive-disclosure.test.js +280 -0
  213. package/dist/tests/proposals.test.js +279 -0
  214. package/dist/tests/proposed-quality.test.js +271 -0
  215. package/dist/tests/provider-registry.test.js +32 -0
  216. package/dist/tests/ranking-regression.test.js +548 -0
  217. package/dist/tests/reflect-propose.test.js +455 -0
  218. package/dist/tests/registry-build-index.test.js +394 -0
  219. package/dist/tests/registry-cli.test.js +290 -0
  220. package/dist/tests/registry-index-v2.test.js +430 -0
  221. package/dist/tests/registry-install.test.js +728 -0
  222. package/dist/tests/registry-providers/parity.test.js +189 -0
  223. package/dist/tests/registry-providers/skills-sh.test.js +309 -0
  224. package/dist/tests/registry-providers/static-index.test.js +238 -0
  225. package/dist/tests/registry-resolve.test.js +126 -0
  226. package/dist/tests/registry-search.test.js +923 -0
  227. package/dist/tests/remember-frontmatter.test.js +378 -0
  228. package/dist/tests/remember-unit.test.js +123 -0
  229. package/dist/tests/ripgrep-install.test.js +251 -0
  230. package/dist/tests/ripgrep-resolve.test.js +108 -0
  231. package/dist/tests/ripgrep.test.js +163 -0
  232. package/dist/tests/save-command.test.js +94 -0
  233. package/dist/tests/save-trust-qa-fixes.test.js +270 -0
  234. package/dist/tests/scoring-pipeline.test.js +648 -0
  235. package/dist/tests/search-include-proposed-cli.test.js +118 -0
  236. package/dist/tests/self-update.test.js +442 -0
  237. package/dist/tests/semantic-search-e2e.test.js +512 -0
  238. package/dist/tests/semantic-status.test.js +471 -0
  239. package/dist/tests/setup-run.integration.js +877 -0
  240. package/dist/tests/setup-wizard.test.js +198 -0
  241. package/dist/tests/setup.test.js +131 -0
  242. package/dist/tests/source-add.test.js +11 -0
  243. package/dist/tests/source-clone.test.js +254 -0
  244. package/dist/tests/source-manage.test.js +366 -0
  245. package/dist/tests/source-providers/filesystem.test.js +82 -0
  246. package/dist/tests/source-providers/git.test.js +252 -0
  247. package/dist/tests/source-providers/website.test.js +128 -0
  248. package/dist/tests/source-qa-fixes.test.js +286 -0
  249. package/dist/tests/source-registry.test.js +350 -0
  250. package/dist/tests/source-resolve.test.js +100 -0
  251. package/dist/tests/source-source.test.js +281 -0
  252. package/dist/tests/source.test.js +533 -0
  253. package/dist/tests/tar-utils-scan.test.js +73 -0
  254. package/dist/tests/toggle-components.test.js +73 -0
  255. package/dist/tests/usage-telemetry.test.js +265 -0
  256. package/dist/tests/utility-scoring.test.js +558 -0
  257. package/dist/tests/vault-load-error.test.js +78 -0
  258. package/dist/tests/vault-qa-fixes.test.js +194 -0
  259. package/dist/tests/vault.test.js +429 -0
  260. package/dist/tests/vector-search.test.js +608 -0
  261. package/dist/tests/walker.test.js +252 -0
  262. package/dist/tests/wave2-cluster-bc.test.js +228 -0
  263. package/dist/tests/wave2-cluster-d.test.js +180 -0
  264. package/dist/tests/wave2-cluster-e.test.js +179 -0
  265. package/dist/tests/wiki-qa-fixes.test.js +270 -0
  266. package/dist/tests/wiki.test.js +529 -0
  267. package/dist/tests/workflow-cli.test.js +271 -0
  268. package/dist/tests/workflow-markdown.test.js +171 -0
  269. package/dist/tests/workflow-path-escape.test.js +132 -0
  270. package/dist/tests/workflow-qa-fixes.test.js +395 -0
  271. package/dist/tests/workflows/indexer-rejection.test.js +213 -0
  272. package/docs/README.md +8 -0
  273. package/docs/migration/release-notes/0.7.0.md +244 -0
  274. package/package.json +2 -2
  275. package/dist/core/warn.js +0 -27
  276. package/dist/output/shapes.js +0 -212
  277. package/dist/output/text.js +0 -520
  278. /package/dist/{commands → src/commands}/completions.js +0 -0
  279. /package/dist/{commands → src/commands}/curate.js +0 -0
  280. /package/dist/{commands → src/commands}/info.js +0 -0
  281. /package/dist/{commands → src/commands}/init.js +0 -0
  282. /package/dist/{commands → src/commands}/install-audit.js +0 -0
  283. /package/dist/{commands → src/commands}/migration-help.js +0 -0
  284. /package/dist/{commands → src/commands}/source-clone.js +0 -0
  285. /package/dist/{commands → src/commands}/vault.js +0 -0
  286. /package/dist/{core → src/core}/asset-registry.js +0 -0
  287. /package/dist/{core → src/core}/frontmatter.js +0 -0
  288. /package/dist/{core → src/core}/markdown.js +0 -0
  289. /package/dist/{core → src/core}/paths.js +0 -0
  290. /package/dist/{indexer → src/indexer}/manifest.js +0 -0
  291. /package/dist/{indexer → src/indexer}/search-fields.js +0 -0
  292. /package/dist/{indexer → src/indexer}/semantic-status.js +0 -0
  293. /package/dist/{indexer → src/indexer}/usage-events.js +0 -0
  294. /package/dist/{indexer → src/indexer}/walker.js +0 -0
  295. /package/dist/{llm → src/llm}/embedder.js +0 -0
  296. /package/dist/{llm → src/llm}/embedders/cache.js +0 -0
  297. /package/dist/{llm → src/llm}/embedders/local.js +0 -0
  298. /package/dist/{llm → src/llm}/embedders/types.js +0 -0
  299. /package/dist/{llm → src/llm}/metadata-enhance.js +0 -0
  300. /package/dist/{output → src/output}/context.js +0 -0
  301. /package/dist/{registry → src/registry}/create-provider-registry.js +0 -0
  302. /package/dist/{registry → src/registry}/origin-resolve.js +0 -0
  303. /package/dist/{registry → src/registry}/providers/index.js +0 -0
  304. /package/dist/{registry → src/registry}/providers/skills-sh.js +0 -0
  305. /package/dist/{registry → src/registry}/providers/types.js +0 -0
  306. /package/dist/{registry → src/registry}/types.js +0 -0
  307. /package/dist/{setup → src/setup}/detect.js +0 -0
  308. /package/dist/{setup → src/setup}/ripgrep-install.js +0 -0
  309. /package/dist/{setup → src/setup}/ripgrep-resolve.js +0 -0
  310. /package/dist/{setup → src/setup}/steps.js +0 -0
  311. /package/dist/{sources → src/sources}/include.js +0 -0
  312. /package/dist/{sources → src/sources}/provider-factory.js +0 -0
  313. /package/dist/{sources → src/sources}/provider.js +0 -0
  314. /package/dist/{sources → src/sources}/providers/filesystem.js +0 -0
  315. /package/dist/{sources → src/sources}/providers/index.js +0 -0
  316. /package/dist/{sources → src/sources}/providers/install-types.js +0 -0
  317. /package/dist/{sources → src/sources}/providers/npm.js +0 -0
  318. /package/dist/{sources → src/sources}/providers/provider-utils.js +0 -0
  319. /package/dist/{sources → src/sources}/providers/sync-from-ref.js +0 -0
  320. /package/dist/{sources → src/sources}/providers/tar-utils.js +0 -0
  321. /package/dist/{sources → src/sources}/providers/website.js +0 -0
  322. /package/dist/{sources → src/sources}/resolve.js +0 -0
  323. /package/dist/{sources → src/sources}/types.js +0 -0
  324. /package/dist/{templates → src/templates}/wiki-templates.js +0 -0
  325. /package/dist/{version.js → src/version.js} +0 -0
  326. /package/dist/{workflows → src/workflows}/authoring.js +0 -0
  327. /package/dist/{workflows → src/workflows}/cli.js +0 -0
  328. /package/dist/{workflows → src/workflows}/db.js +0 -0
  329. /package/dist/{workflows → src/workflows}/document-cache.js +0 -0
  330. /package/dist/{workflows → src/workflows}/parser.js +0 -0
  331. /package/dist/{workflows → src/workflows}/renderer.js +0 -0
  332. /package/dist/{workflows → src/workflows}/schema.js +0 -0
  333. /package/dist/{workflows → src/workflows}/validator.js +0 -0
@@ -0,0 +1,421 @@
1
+ /**
2
+ * Tests for the workflow compliance evaluator (issue #256).
3
+ *
4
+ * Cases covered (per acceptance criteria):
5
+ * - pass, partial, fail, not_applicable, harness_error
6
+ * - wrong-order, missing-event, forbidden-event
7
+ * - wrong-feedback-polarity, irrelevant-asset-loaded
8
+ * - violation cap, schemaVersion stability, applies_to filter,
9
+ * evaluateRunAgainstAllSpecs orchestration, pure-function guarantees.
10
+ */
11
+ import { describe, expect, test } from "bun:test";
12
+ import { evaluateRunAgainstAllSpecs, evaluateRunAgainstSpec, MAX_VIOLATIONS_PER_CHECK, } from "./workflow-evaluator";
13
+ /* ── Fixtures ─────────────────────────────────────────────────────────────── */
14
+ function makeRun(overrides = {}) {
15
+ return { arm: "akm", taskId: "docker-homelab/redis", seed: 1, outcome: "pass", ...overrides };
16
+ }
17
+ let nextEventId = 0;
18
+ function ev(type, extra = {}) {
19
+ return {
20
+ id: extra.id ?? nextEventId++,
21
+ taskId: "docker-homelab/redis",
22
+ arm: "akm",
23
+ seed: 1,
24
+ type,
25
+ source: "akm_events",
26
+ ...extra,
27
+ };
28
+ }
29
+ function makeTrace(events, overrides = {}) {
30
+ // Re-stamp ids so the array's order is the canonical "first occurrence" order.
31
+ const stamped = events.map((e, i) => ({ ...e, id: i }));
32
+ return {
33
+ schemaVersion: 1,
34
+ taskId: "docker-homelab/redis",
35
+ arm: "akm",
36
+ seed: 1,
37
+ events: stamped,
38
+ truncated: false,
39
+ ...overrides,
40
+ };
41
+ }
42
+ function makeSpec(overrides = {}) {
43
+ const required = overrides.required_sequence ?? [
44
+ { event: "agent_started" },
45
+ { event: "akm_search", before: "first_workspace_write" },
46
+ { event: "first_workspace_write" },
47
+ { event: "agent_finished" },
48
+ ];
49
+ const forbidden = overrides.forbidden ?? [
50
+ { event: "first_workspace_write", before: "akm_search" },
51
+ ];
52
+ const base = {
53
+ id: "test-spec",
54
+ title: "Test spec",
55
+ required_sequence: required,
56
+ scoring: { required_steps_weight: 0.6, forbidden_steps_weight: 0.2, evidence_quality_weight: 0.2 },
57
+ sourcePath: "/virtual/test-spec.yaml",
58
+ };
59
+ if (forbidden !== undefined)
60
+ base.forbidden = forbidden;
61
+ return { ...base, ...overrides, required_sequence: required, forbidden };
62
+ }
63
+ /* ── Status: pass ─────────────────────────────────────────────────────────── */
64
+ describe("evaluateRunAgainstSpec — pass", () => {
65
+ test("all required steps present, no forbidden, in order", () => {
66
+ const trace = makeTrace([
67
+ ev("agent_started"),
68
+ ev("akm_search"),
69
+ ev("first_workspace_write"),
70
+ ev("verifier_run", { exitCode: 0 }),
71
+ ev("agent_finished"),
72
+ ]);
73
+ const result = evaluateRunAgainstSpec(trace, makeSpec(), makeRun());
74
+ expect(result.status).toBe("pass");
75
+ expect(result.requiredPassed).toBe(result.requiredTotal);
76
+ expect(result.violations).toEqual([]);
77
+ expect(result.score).toBeGreaterThan(0.8);
78
+ expect(result.schemaVersion).toBe(1);
79
+ expect(result.workflowId).toBe("test-spec");
80
+ });
81
+ });
82
+ /* ── Status: missing required event ───────────────────────────────────────── */
83
+ describe("evaluateRunAgainstSpec — missing required event", () => {
84
+ test("flags missing_required_event when akm_search absent", () => {
85
+ const trace = makeTrace([ev("agent_started"), ev("first_workspace_write"), ev("agent_finished")]);
86
+ const result = evaluateRunAgainstSpec(trace, makeSpec(), makeRun());
87
+ expect(result.status).toBe("partial"); // some required steps still passed
88
+ const codes = result.violations.map((v) => v.code);
89
+ expect(codes).toContain("missing_required_event");
90
+ expect(result.requiredPassed).toBeLessThan(result.requiredTotal);
91
+ });
92
+ test("status=fail when zero required steps pass", () => {
93
+ const trace = makeTrace([]);
94
+ const result = evaluateRunAgainstSpec(trace, makeSpec(), makeRun());
95
+ expect(result.status).toBe("fail");
96
+ expect(result.requiredPassed).toBe(0);
97
+ });
98
+ });
99
+ /* ── Status: wrong order ──────────────────────────────────────────────────── */
100
+ describe("evaluateRunAgainstSpec — wrong order", () => {
101
+ test("flags wrong_order when first_workspace_write precedes akm_search", () => {
102
+ const trace = makeTrace([ev("agent_started"), ev("first_workspace_write"), ev("akm_search"), ev("agent_finished")]);
103
+ const result = evaluateRunAgainstSpec(trace, makeSpec(), makeRun());
104
+ const codes = result.violations.map((v) => v.code);
105
+ expect(codes).toContain("wrong_order");
106
+ expect(result.status).not.toBe("pass");
107
+ });
108
+ test("step.after order check fires when this-event precedes guard", () => {
109
+ const spec = makeSpec({
110
+ required_sequence: [
111
+ { event: "agent_started" },
112
+ { event: "akm_feedback", after: "verifier_run" },
113
+ { event: "verifier_run" },
114
+ { event: "agent_finished" },
115
+ ],
116
+ forbidden: [],
117
+ });
118
+ const trace = makeTrace([
119
+ ev("agent_started"),
120
+ ev("akm_feedback", { args: ["+1", "skill:foo"] }),
121
+ ev("verifier_run", { exitCode: 0 }),
122
+ ev("agent_finished"),
123
+ ]);
124
+ const result = evaluateRunAgainstSpec(trace, spec, makeRun());
125
+ expect(result.violations.some((v) => v.code === "wrong_order")).toBe(true);
126
+ });
127
+ });
128
+ /* ── Status: forbidden event ──────────────────────────────────────────────── */
129
+ describe("evaluateRunAgainstSpec — forbidden event", () => {
130
+ test("flags forbidden_event for unconditional forbidden step", () => {
131
+ const spec = makeSpec({
132
+ required_sequence: [{ event: "agent_started" }, { event: "agent_finished" }],
133
+ forbidden: [{ event: "akm_distill" }],
134
+ });
135
+ const trace = makeTrace([ev("agent_started"), ev("akm_distill"), ev("agent_finished")]);
136
+ const result = evaluateRunAgainstSpec(trace, spec, makeRun());
137
+ const codes = result.violations.map((v) => v.code);
138
+ expect(codes).toContain("forbidden_event");
139
+ });
140
+ test("classifies reflection_without_failure for akm_reflect before feedback", () => {
141
+ const spec = makeSpec({
142
+ required_sequence: [{ event: "agent_started" }, { event: "agent_finished" }],
143
+ forbidden: [{ event: "akm_reflect", before: "akm_feedback" }],
144
+ });
145
+ const trace = makeTrace([
146
+ ev("agent_started"),
147
+ ev("akm_reflect"),
148
+ ev("akm_feedback", { args: ["-1", "skill:foo"] }),
149
+ ev("agent_finished"),
150
+ ]);
151
+ const result = evaluateRunAgainstSpec(trace, spec, makeRun());
152
+ expect(result.violations.some((v) => v.code === "reflection_without_failure")).toBe(true);
153
+ });
154
+ test("classifies proposal_accepted_without_validation", () => {
155
+ const spec = makeSpec({
156
+ required_sequence: [{ event: "agent_started" }, { event: "agent_finished" }],
157
+ forbidden: [{ event: "akm_proposal_accept", before: "verifier_run" }],
158
+ });
159
+ const trace = makeTrace([
160
+ ev("agent_started"),
161
+ ev("akm_proposal_accept"),
162
+ ev("verifier_run", { exitCode: 0 }),
163
+ ev("agent_finished"),
164
+ ]);
165
+ const result = evaluateRunAgainstSpec(trace, spec, makeRun());
166
+ expect(result.violations.some((v) => v.code === "proposal_accepted_without_validation")).toBe(true);
167
+ });
168
+ });
169
+ /* ── Wrong feedback polarity ──────────────────────────────────────────────── */
170
+ describe("evaluateRunAgainstSpec — wrong feedback polarity", () => {
171
+ test("verifier failed but agent recorded only positive feedback", () => {
172
+ const spec = makeSpec({
173
+ required_sequence: [
174
+ { event: "agent_started" },
175
+ { event: "akm_feedback", polarity: "negative" },
176
+ { event: "agent_finished" },
177
+ ],
178
+ forbidden: [{ event: "akm_feedback", polarity: "positive" }],
179
+ });
180
+ const trace = makeTrace([
181
+ ev("agent_started"),
182
+ ev("akm_feedback", { args: ["+1", "skill:foo"] }),
183
+ ev("verifier_run", { exitCode: 1 }),
184
+ ev("agent_finished"),
185
+ ]);
186
+ const result = evaluateRunAgainstSpec(trace, spec, makeRun({ outcome: "fail", verifierFailed: true }));
187
+ const codes = result.violations.map((v) => v.code);
188
+ expect(codes).toContain("wrong_feedback_polarity");
189
+ // The negative-polarity required step should also be missing.
190
+ expect(codes).toContain("missing_required_event");
191
+ expect(result.status).not.toBe("pass");
192
+ });
193
+ test("polarity: positive step matches +1 args", () => {
194
+ const spec = makeSpec({
195
+ required_sequence: [
196
+ { event: "agent_started" },
197
+ { event: "akm_feedback", polarity: "positive" },
198
+ { event: "agent_finished" },
199
+ ],
200
+ forbidden: [],
201
+ });
202
+ const trace = makeTrace([
203
+ ev("agent_started"),
204
+ ev("akm_feedback", { args: ["+1", "skill:foo"] }),
205
+ ev("verifier_run", { exitCode: 0 }),
206
+ ev("agent_finished"),
207
+ ]);
208
+ const result = evaluateRunAgainstSpec(trace, spec, makeRun());
209
+ expect(result.status).toBe("pass");
210
+ });
211
+ });
212
+ /* ── Irrelevant asset loaded ──────────────────────────────────────────────── */
213
+ describe("evaluateRunAgainstSpec — irrelevant_asset_loaded", () => {
214
+ test("flags when akm_show ref doesn't match gold_ref", () => {
215
+ const spec = makeSpec({
216
+ required_sequence: [
217
+ { event: "agent_started" },
218
+ { event: "akm_show", ref_must_equal: "gold_ref" },
219
+ { event: "agent_finished" },
220
+ ],
221
+ forbidden: [],
222
+ });
223
+ const trace = makeTrace([ev("agent_started"), ev("akm_show", { assetRef: "skill:wrong" }), ev("agent_finished")]);
224
+ const task = { goldRef: "skill:deploy" };
225
+ const result = evaluateRunAgainstSpec(trace, spec, makeRun(), task);
226
+ const v = result.violations.find((x) => x.code === "irrelevant_asset_loaded");
227
+ expect(v).toBeDefined();
228
+ expect(v?.expected).toBe("skill:deploy");
229
+ expect(v?.observed).toBe("skill:wrong");
230
+ });
231
+ test("top-level gold-ref check fires when spec cares but akm_show never loaded gold", () => {
232
+ const spec = makeSpec({
233
+ required_sequence: [
234
+ { event: "agent_started" },
235
+ { event: "akm_show", ref_must_equal: "gold_ref" },
236
+ { event: "agent_finished" },
237
+ ],
238
+ forbidden: [],
239
+ });
240
+ const trace = makeTrace([ev("agent_started"), ev("agent_finished")]);
241
+ const result = evaluateRunAgainstSpec(trace, spec, makeRun(), { goldRef: "skill:deploy" });
242
+ // Either the per-step `irrelevant_asset_loaded` OR the spec-level gold check should fire.
243
+ const codes = result.violations.map((v) => v.code);
244
+ expect(codes.some((c) => c === "irrelevant_asset_loaded" || c === "missing_required_event")).toBe(true);
245
+ });
246
+ test("passes when akm_show loads the gold_ref", () => {
247
+ const spec = makeSpec({
248
+ required_sequence: [
249
+ { event: "agent_started" },
250
+ { event: "akm_show", ref_must_equal: "gold_ref" },
251
+ { event: "agent_finished" },
252
+ ],
253
+ forbidden: [],
254
+ });
255
+ const trace = makeTrace([ev("agent_started"), ev("akm_show", { assetRef: "skill:deploy" }), ev("agent_finished")]);
256
+ const result = evaluateRunAgainstSpec(trace, spec, makeRun(), { goldRef: "skill:deploy" });
257
+ expect(result.status).toBe("pass");
258
+ expect(result.evidence.goldAssetLoaded).toBe(true);
259
+ });
260
+ });
261
+ /* ── applies_to filter / not_applicable ───────────────────────────────────── */
262
+ describe("evaluateRunAgainstAllSpecs — applies_to", () => {
263
+ test("returns not_applicable when arm filter excludes the run", () => {
264
+ const spec = makeSpec({ applies_to: { arms: ["control"] } });
265
+ const trace = makeTrace([ev("agent_started"), ev("agent_finished")]);
266
+ const results = evaluateRunAgainstAllSpecs(trace, [spec], makeRun({ arm: "akm" }));
267
+ expect(results).toHaveLength(1);
268
+ expect(results[0].status).toBe("not_applicable");
269
+ expect(results[0].violations).toEqual([]);
270
+ expect(results[0].score).toBe(0);
271
+ });
272
+ test("evaluates spec when applies_to matches", () => {
273
+ const spec = makeSpec({ applies_to: { arms: ["akm"] } });
274
+ const trace = makeTrace([ev("agent_started"), ev("akm_search"), ev("first_workspace_write"), ev("agent_finished")]);
275
+ const results = evaluateRunAgainstAllSpecs(trace, [spec], makeRun());
276
+ expect(results).toHaveLength(1);
277
+ expect(results[0].status).toBe("pass");
278
+ });
279
+ test("requires_gold_ref filter checks task.goldRef", () => {
280
+ const spec = makeSpec({ applies_to: { requires_gold_ref: true } });
281
+ const trace = makeTrace([ev("agent_started"), ev("akm_search"), ev("first_workspace_write"), ev("agent_finished")]);
282
+ // No goldRef → not_applicable.
283
+ const r1 = evaluateRunAgainstAllSpecs(trace, [spec], makeRun());
284
+ expect(r1[0].status).toBe("not_applicable");
285
+ // With goldRef → applies.
286
+ const r2 = evaluateRunAgainstAllSpecs(trace, [spec], makeRun(), { goldRef: "skill:deploy" });
287
+ expect(r2[0].status).not.toBe("not_applicable");
288
+ });
289
+ });
290
+ /* ── Harness error ────────────────────────────────────────────────────────── */
291
+ describe("evaluateRunAgainstSpec — harness_error", () => {
292
+ test("malformed trace yields harness_error and does not throw", () => {
293
+ const result = evaluateRunAgainstSpec(undefined, makeSpec(), makeRun());
294
+ expect(result.status).toBe("harness_error");
295
+ expect(result.violations[0].code).toBe("missing_evidence");
296
+ expect(result.score).toBe(0);
297
+ expect(result.schemaVersion).toBe(1);
298
+ });
299
+ test("malformed spec yields harness_error", () => {
300
+ const trace = makeTrace([ev("agent_started")]);
301
+ // @ts-expect-error — intentionally malformed
302
+ const result = evaluateRunAgainstSpec(trace, { id: "x" }, makeRun());
303
+ expect(result.status).toBe("harness_error");
304
+ });
305
+ test("trace with non-array events does not throw", () => {
306
+ // @ts-expect-error — intentional misuse
307
+ const bad = { schemaVersion: 1, taskId: "x", arm: "akm", seed: 1, events: null };
308
+ const result = evaluateRunAgainstSpec(bad, makeSpec(), makeRun());
309
+ expect(result.status).toBe("harness_error");
310
+ });
311
+ test("evaluator does not mutate inputs", () => {
312
+ const trace = makeTrace([ev("agent_started"), ev("akm_search"), ev("first_workspace_write"), ev("agent_finished")]);
313
+ const traceJson = JSON.stringify(trace);
314
+ const spec = makeSpec();
315
+ const specJson = JSON.stringify(spec);
316
+ const task = { goldRef: "skill:deploy", flags: { foo: true } };
317
+ const taskJson = JSON.stringify(task);
318
+ evaluateRunAgainstSpec(trace, spec, makeRun(), task);
319
+ expect(JSON.stringify(trace)).toBe(traceJson);
320
+ expect(JSON.stringify(spec)).toBe(specJson);
321
+ expect(JSON.stringify(task)).toBe(taskJson);
322
+ });
323
+ });
324
+ /* ── required_if ──────────────────────────────────────────────────────────── */
325
+ describe("evaluateRunAgainstSpec — required_if guards", () => {
326
+ test("step is skipped when required_if flag is false/missing", () => {
327
+ const spec = makeSpec({
328
+ required_sequence: [
329
+ { event: "agent_started" },
330
+ { event: "akm_show", required_if: "search_has_relevant_result" },
331
+ { event: "agent_finished" },
332
+ ],
333
+ forbidden: [],
334
+ });
335
+ const trace = makeTrace([ev("agent_started"), ev("agent_finished")]);
336
+ const result = evaluateRunAgainstSpec(trace, spec, makeRun());
337
+ expect(result.status).toBe("pass");
338
+ // requiredTotal should NOT include the gated step.
339
+ expect(result.requiredTotal).toBe(2);
340
+ });
341
+ test("step is enforced when required_if flag is true", () => {
342
+ const spec = makeSpec({
343
+ required_sequence: [
344
+ { event: "agent_started" },
345
+ { event: "akm_show", required_if: "search_has_relevant_result" },
346
+ { event: "agent_finished" },
347
+ ],
348
+ forbidden: [],
349
+ });
350
+ const trace = makeTrace([ev("agent_started"), ev("agent_finished")]);
351
+ const result = evaluateRunAgainstSpec(trace, spec, makeRun(), {
352
+ flags: { search_has_relevant_result: true },
353
+ });
354
+ expect(result.status).not.toBe("pass");
355
+ expect(result.violations.some((v) => v.code === "missing_required_event")).toBe(true);
356
+ });
357
+ });
358
+ /* ── min_count ────────────────────────────────────────────────────────────── */
359
+ describe("evaluateRunAgainstSpec — min_count", () => {
360
+ test("requires N matching events with the right polarity", () => {
361
+ const spec = makeSpec({
362
+ required_sequence: [
363
+ { event: "agent_started" },
364
+ { event: "akm_feedback", polarity: "negative", min_count: 2 },
365
+ { event: "agent_finished" },
366
+ ],
367
+ forbidden: [],
368
+ });
369
+ const traceOne = makeTrace([
370
+ ev("agent_started"),
371
+ ev("akm_feedback", { args: ["-1", "skill:foo"] }),
372
+ ev("agent_finished"),
373
+ ]);
374
+ const r1 = evaluateRunAgainstSpec(traceOne, spec, makeRun({ outcome: "fail", verifierFailed: true }));
375
+ expect(r1.violations.some((v) => v.code === "missing_required_event")).toBe(true);
376
+ const traceTwo = makeTrace([
377
+ ev("agent_started"),
378
+ ev("akm_feedback", { args: ["-1", "skill:foo"] }),
379
+ ev("akm_feedback", { args: ["-1", "skill:bar"] }),
380
+ ev("agent_finished"),
381
+ ]);
382
+ const r2 = evaluateRunAgainstSpec(traceTwo, spec, makeRun({ outcome: "fail", verifierFailed: true }));
383
+ expect(r2.status).toBe("pass");
384
+ });
385
+ });
386
+ /* ── violation cap ────────────────────────────────────────────────────────── */
387
+ describe("evaluateRunAgainstSpec — violation cap", () => {
388
+ test("caps violations at MAX_VIOLATIONS_PER_CHECK", () => {
389
+ const required = [];
390
+ for (let i = 0; i < MAX_VIOLATIONS_PER_CHECK + 10; i += 1) {
391
+ // Use a known event name (akm_search) so the spec passes loader-style validation
392
+ // even though required_sequence here is constructed in-memory.
393
+ required.push({ event: "akm_search" });
394
+ }
395
+ // Each required step will fail with missing_required_event because trace is empty.
396
+ const spec = makeSpec({ required_sequence: required, forbidden: [] });
397
+ const trace = makeTrace([]);
398
+ const result = evaluateRunAgainstSpec(trace, spec, makeRun());
399
+ expect(result.violations.length).toBe(MAX_VIOLATIONS_PER_CHECK);
400
+ });
401
+ });
402
+ /* ── schemaVersion stability ──────────────────────────────────────────────── */
403
+ describe("WorkflowCheckResult shape", () => {
404
+ test("envelope always carries schemaVersion: 1", () => {
405
+ const trace = makeTrace([ev("agent_started"), ev("agent_finished")]);
406
+ const r = evaluateRunAgainstSpec(trace, makeSpec(), makeRun());
407
+ expect(r.schemaVersion).toBe(1);
408
+ // Check key envelope fields exist.
409
+ expect(typeof r.workflowId).toBe("string");
410
+ expect(typeof r.taskId).toBe("string");
411
+ expect(typeof r.arm).toBe("string");
412
+ expect(typeof r.seed).toBe("number");
413
+ expect(Array.isArray(r.violations)).toBe(true);
414
+ expect(r.evidence).toMatchObject({
415
+ matchedEvents: expect.any(Number),
416
+ feedbackRecorded: expect.any(Boolean),
417
+ goldAssetLoaded: expect.any(Boolean),
418
+ traceTruncated: expect.any(Boolean),
419
+ });
420
+ });
421
+ });