popeye-cli 2.1.0 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (356) hide show
  1. package/dist/adapters/gemini.d.ts +14 -0
  2. package/dist/adapters/gemini.d.ts.map +1 -1
  3. package/dist/adapters/gemini.js +41 -6
  4. package/dist/adapters/gemini.js.map +1 -1
  5. package/dist/adapters/grok.d.ts +14 -0
  6. package/dist/adapters/grok.d.ts.map +1 -1
  7. package/dist/adapters/grok.js +42 -6
  8. package/dist/adapters/grok.js.map +1 -1
  9. package/dist/adapters/openai.d.ts +10 -0
  10. package/dist/adapters/openai.d.ts.map +1 -1
  11. package/dist/adapters/openai.js +44 -5
  12. package/dist/adapters/openai.js.map +1 -1
  13. package/dist/cli/commands/create.js +1 -1
  14. package/dist/cli/commands/create.js.map +1 -1
  15. package/dist/cli/interactive.d.ts.map +1 -1
  16. package/dist/cli/interactive.js +328 -21
  17. package/dist/cli/interactive.js.map +1 -1
  18. package/dist/generators/all.d.ts.map +1 -1
  19. package/dist/generators/all.js +25 -2
  20. package/dist/generators/all.js.map +1 -1
  21. package/dist/generators/doc-parser.d.ts +21 -6
  22. package/dist/generators/doc-parser.d.ts.map +1 -1
  23. package/dist/generators/doc-parser.js +55 -4
  24. package/dist/generators/doc-parser.js.map +1 -1
  25. package/dist/generators/templates/fullstack.js +1 -1
  26. package/dist/generators/templates/website-components.js +1 -1
  27. package/dist/generators/templates/website-components.js.map +1 -1
  28. package/dist/generators/templates/website-config.d.ts +4 -1
  29. package/dist/generators/templates/website-config.d.ts.map +1 -1
  30. package/dist/generators/templates/website-config.js +17 -11
  31. package/dist/generators/templates/website-config.js.map +1 -1
  32. package/dist/generators/templates/website-conversion.js +1 -1
  33. package/dist/generators/templates/website-conversion.js.map +1 -1
  34. package/dist/generators/templates/website-landing.js +1 -1
  35. package/dist/generators/templates/website-landing.js.map +1 -1
  36. package/dist/generators/templates/website-layout.d.ts +36 -4
  37. package/dist/generators/templates/website-layout.d.ts.map +1 -1
  38. package/dist/generators/templates/website-layout.js +466 -23
  39. package/dist/generators/templates/website-layout.js.map +1 -1
  40. package/dist/generators/templates/website-pricing.js +1 -1
  41. package/dist/generators/templates/website-pricing.js.map +1 -1
  42. package/dist/generators/templates/website-sections.js +1 -1
  43. package/dist/generators/templates/website-sections.js.map +1 -1
  44. package/dist/generators/templates/website-seo.d.ts.map +1 -1
  45. package/dist/generators/templates/website-seo.js +4 -1
  46. package/dist/generators/templates/website-seo.js.map +1 -1
  47. package/dist/generators/templates/website.d.ts +1 -1
  48. package/dist/generators/templates/website.d.ts.map +1 -1
  49. package/dist/generators/templates/website.js +1 -1
  50. package/dist/generators/templates/website.js.map +1 -1
  51. package/dist/generators/website-content-ai.d.ts +52 -0
  52. package/dist/generators/website-content-ai.d.ts.map +1 -0
  53. package/dist/generators/website-content-ai.js +141 -0
  54. package/dist/generators/website-content-ai.js.map +1 -0
  55. package/dist/generators/website-content-scanner.d.ts +1 -1
  56. package/dist/generators/website-content-scanner.d.ts.map +1 -1
  57. package/dist/generators/website-content-scanner.js +98 -1
  58. package/dist/generators/website-content-scanner.js.map +1 -1
  59. package/dist/generators/website-context.d.ts +34 -1
  60. package/dist/generators/website-context.d.ts.map +1 -1
  61. package/dist/generators/website-context.js +131 -9
  62. package/dist/generators/website-context.js.map +1 -1
  63. package/dist/generators/website-debug.d.ts +12 -0
  64. package/dist/generators/website-debug.d.ts.map +1 -1
  65. package/dist/generators/website-debug.js +16 -0
  66. package/dist/generators/website-debug.js.map +1 -1
  67. package/dist/generators/website.d.ts.map +1 -1
  68. package/dist/generators/website.js +26 -4
  69. package/dist/generators/website.js.map +1 -1
  70. package/dist/pipeline/artifact-manager.d.ts.map +1 -1
  71. package/dist/pipeline/artifact-manager.js +3 -0
  72. package/dist/pipeline/artifact-manager.js.map +1 -1
  73. package/dist/pipeline/auto-recovery.d.ts +56 -0
  74. package/dist/pipeline/auto-recovery.d.ts.map +1 -0
  75. package/dist/pipeline/auto-recovery.js +185 -0
  76. package/dist/pipeline/auto-recovery.js.map +1 -0
  77. package/dist/pipeline/change-request.d.ts +39 -0
  78. package/dist/pipeline/change-request.d.ts.map +1 -1
  79. package/dist/pipeline/change-request.js +40 -1
  80. package/dist/pipeline/change-request.js.map +1 -1
  81. package/dist/pipeline/check-runner.d.ts +30 -1
  82. package/dist/pipeline/check-runner.d.ts.map +1 -1
  83. package/dist/pipeline/check-runner.js +122 -1
  84. package/dist/pipeline/check-runner.js.map +1 -1
  85. package/dist/pipeline/command-resolver.d.ts.map +1 -1
  86. package/dist/pipeline/command-resolver.js +33 -2
  87. package/dist/pipeline/command-resolver.js.map +1 -1
  88. package/dist/pipeline/consensus/arbitrator-query.d.ts +22 -0
  89. package/dist/pipeline/consensus/arbitrator-query.d.ts.map +1 -0
  90. package/dist/pipeline/consensus/arbitrator-query.js +70 -0
  91. package/dist/pipeline/consensus/arbitrator-query.js.map +1 -0
  92. package/dist/pipeline/consensus/consensus-runner.d.ts +131 -7
  93. package/dist/pipeline/consensus/consensus-runner.d.ts.map +1 -1
  94. package/dist/pipeline/consensus/consensus-runner.js +809 -35
  95. package/dist/pipeline/consensus/consensus-runner.js.map +1 -1
  96. package/dist/pipeline/cr-lifecycle.d.ts +42 -0
  97. package/dist/pipeline/cr-lifecycle.d.ts.map +1 -0
  98. package/dist/pipeline/cr-lifecycle.js +89 -0
  99. package/dist/pipeline/cr-lifecycle.js.map +1 -0
  100. package/dist/pipeline/gate-engine.d.ts +1 -0
  101. package/dist/pipeline/gate-engine.d.ts.map +1 -1
  102. package/dist/pipeline/gate-engine.js +27 -8
  103. package/dist/pipeline/gate-engine.js.map +1 -1
  104. package/dist/pipeline/migration.d.ts.map +1 -1
  105. package/dist/pipeline/migration.js +3 -26
  106. package/dist/pipeline/migration.js.map +1 -1
  107. package/dist/pipeline/orchestrator.d.ts +1 -1
  108. package/dist/pipeline/orchestrator.d.ts.map +1 -1
  109. package/dist/pipeline/orchestrator.js +311 -16
  110. package/dist/pipeline/orchestrator.js.map +1 -1
  111. package/dist/pipeline/packets/consensus-packet-builder.d.ts +15 -4
  112. package/dist/pipeline/packets/consensus-packet-builder.d.ts.map +1 -1
  113. package/dist/pipeline/packets/consensus-packet-builder.js +29 -17
  114. package/dist/pipeline/packets/consensus-packet-builder.js.map +1 -1
  115. package/dist/pipeline/phases/architecture.d.ts.map +1 -1
  116. package/dist/pipeline/phases/architecture.js +5 -3
  117. package/dist/pipeline/phases/architecture.js.map +1 -1
  118. package/dist/pipeline/phases/audit.d.ts.map +1 -1
  119. package/dist/pipeline/phases/audit.js +5 -3
  120. package/dist/pipeline/phases/audit.js.map +1 -1
  121. package/dist/pipeline/phases/consensus-architecture.d.ts.map +1 -1
  122. package/dist/pipeline/phases/consensus-architecture.js +10 -1
  123. package/dist/pipeline/phases/consensus-architecture.js.map +1 -1
  124. package/dist/pipeline/phases/consensus-master-plan.d.ts.map +1 -1
  125. package/dist/pipeline/phases/consensus-master-plan.js +10 -3
  126. package/dist/pipeline/phases/consensus-master-plan.js.map +1 -1
  127. package/dist/pipeline/phases/consensus-role-plans.d.ts.map +1 -1
  128. package/dist/pipeline/phases/consensus-role-plans.js +10 -1
  129. package/dist/pipeline/phases/consensus-role-plans.js.map +1 -1
  130. package/dist/pipeline/phases/done.d.ts.map +1 -1
  131. package/dist/pipeline/phases/done.js +9 -4
  132. package/dist/pipeline/phases/done.js.map +1 -1
  133. package/dist/pipeline/phases/intake.d.ts +1 -0
  134. package/dist/pipeline/phases/intake.d.ts.map +1 -1
  135. package/dist/pipeline/phases/intake.js +56 -13
  136. package/dist/pipeline/phases/intake.js.map +1 -1
  137. package/dist/pipeline/phases/phase-context.d.ts +2 -0
  138. package/dist/pipeline/phases/phase-context.d.ts.map +1 -1
  139. package/dist/pipeline/phases/phase-context.js +3 -1
  140. package/dist/pipeline/phases/phase-context.js.map +1 -1
  141. package/dist/pipeline/phases/production-gate.d.ts.map +1 -1
  142. package/dist/pipeline/phases/production-gate.js +28 -3
  143. package/dist/pipeline/phases/production-gate.js.map +1 -1
  144. package/dist/pipeline/phases/qa-validation.d.ts.map +1 -1
  145. package/dist/pipeline/phases/qa-validation.js +38 -5
  146. package/dist/pipeline/phases/qa-validation.js.map +1 -1
  147. package/dist/pipeline/phases/recovery-loop.d.ts +2 -0
  148. package/dist/pipeline/phases/recovery-loop.d.ts.map +1 -1
  149. package/dist/pipeline/phases/recovery-loop.js +200 -6
  150. package/dist/pipeline/phases/recovery-loop.js.map +1 -1
  151. package/dist/pipeline/phases/review.d.ts.map +1 -1
  152. package/dist/pipeline/phases/review.js +58 -28
  153. package/dist/pipeline/phases/review.js.map +1 -1
  154. package/dist/pipeline/phases/role-planning.d.ts.map +1 -1
  155. package/dist/pipeline/phases/role-planning.js +20 -5
  156. package/dist/pipeline/phases/role-planning.js.map +1 -1
  157. package/dist/pipeline/phases/stuck.d.ts.map +1 -1
  158. package/dist/pipeline/phases/stuck.js +10 -0
  159. package/dist/pipeline/phases/stuck.js.map +1 -1
  160. package/dist/pipeline/repo-snapshot.d.ts.map +1 -1
  161. package/dist/pipeline/repo-snapshot.js +3 -0
  162. package/dist/pipeline/repo-snapshot.js.map +1 -1
  163. package/dist/pipeline/role-execution-adapter.d.ts +2 -1
  164. package/dist/pipeline/role-execution-adapter.d.ts.map +1 -1
  165. package/dist/pipeline/role-execution-adapter.js +22 -7
  166. package/dist/pipeline/role-execution-adapter.js.map +1 -1
  167. package/dist/pipeline/skill-loader.d.ts +19 -0
  168. package/dist/pipeline/skill-loader.d.ts.map +1 -1
  169. package/dist/pipeline/skill-loader.js +22 -0
  170. package/dist/pipeline/skill-loader.js.map +1 -1
  171. package/dist/pipeline/skills/constitution-generator.d.ts +51 -0
  172. package/dist/pipeline/skills/constitution-generator.d.ts.map +1 -0
  173. package/dist/pipeline/skills/constitution-generator.js +210 -0
  174. package/dist/pipeline/skills/constitution-generator.js.map +1 -0
  175. package/dist/pipeline/skills/coverage-gate.d.ts +44 -0
  176. package/dist/pipeline/skills/coverage-gate.d.ts.map +1 -0
  177. package/dist/pipeline/skills/coverage-gate.js +143 -0
  178. package/dist/pipeline/skills/coverage-gate.js.map +1 -0
  179. package/dist/pipeline/skills/generator.d.ts +65 -0
  180. package/dist/pipeline/skills/generator.d.ts.map +1 -0
  181. package/dist/pipeline/skills/generator.js +221 -0
  182. package/dist/pipeline/skills/generator.js.map +1 -0
  183. package/dist/pipeline/skills/role-map.d.ts +38 -0
  184. package/dist/pipeline/skills/role-map.d.ts.map +1 -0
  185. package/dist/pipeline/skills/role-map.js +234 -0
  186. package/dist/pipeline/skills/role-map.js.map +1 -0
  187. package/dist/pipeline/skills/types.d.ts +47 -0
  188. package/dist/pipeline/skills/types.d.ts.map +1 -0
  189. package/dist/pipeline/skills/types.js +5 -0
  190. package/dist/pipeline/skills/types.js.map +1 -0
  191. package/dist/pipeline/skills/usage-registry.d.ts +48 -0
  192. package/dist/pipeline/skills/usage-registry.d.ts.map +1 -0
  193. package/dist/pipeline/skills/usage-registry.js +55 -0
  194. package/dist/pipeline/skills/usage-registry.js.map +1 -0
  195. package/dist/pipeline/strategy-context.d.ts +20 -0
  196. package/dist/pipeline/strategy-context.d.ts.map +1 -0
  197. package/dist/pipeline/strategy-context.js +55 -0
  198. package/dist/pipeline/strategy-context.js.map +1 -0
  199. package/dist/pipeline/type-defs/artifacts.d.ts +30 -5
  200. package/dist/pipeline/type-defs/artifacts.d.ts.map +1 -1
  201. package/dist/pipeline/type-defs/artifacts.js +5 -0
  202. package/dist/pipeline/type-defs/artifacts.js.map +1 -1
  203. package/dist/pipeline/type-defs/audit.d.ts +28 -13
  204. package/dist/pipeline/type-defs/audit.d.ts.map +1 -1
  205. package/dist/pipeline/type-defs/checks.d.ts +19 -8
  206. package/dist/pipeline/type-defs/checks.d.ts.map +1 -1
  207. package/dist/pipeline/type-defs/checks.js +4 -0
  208. package/dist/pipeline/type-defs/checks.js.map +1 -1
  209. package/dist/pipeline/type-defs/packets.d.ts +119 -18
  210. package/dist/pipeline/type-defs/packets.d.ts.map +1 -1
  211. package/dist/pipeline/type-defs/packets.js +17 -1
  212. package/dist/pipeline/type-defs/packets.js.map +1 -1
  213. package/dist/pipeline/type-defs/state.d.ts +165 -16
  214. package/dist/pipeline/type-defs/state.d.ts.map +1 -1
  215. package/dist/pipeline/type-defs/state.js +26 -1
  216. package/dist/pipeline/type-defs/state.js.map +1 -1
  217. package/dist/shared/text-utils.d.ts +23 -0
  218. package/dist/shared/text-utils.d.ts.map +1 -0
  219. package/dist/shared/text-utils.js +66 -0
  220. package/dist/shared/text-utils.js.map +1 -0
  221. package/dist/shared/website-strategy-format.d.ts +18 -0
  222. package/dist/shared/website-strategy-format.d.ts.map +1 -0
  223. package/dist/shared/website-strategy-format.js +47 -0
  224. package/dist/shared/website-strategy-format.js.map +1 -0
  225. package/dist/state/index.d.ts +2 -0
  226. package/dist/state/index.d.ts.map +1 -1
  227. package/dist/state/index.js +57 -8
  228. package/dist/state/index.js.map +1 -1
  229. package/dist/types/consensus.d.ts +1 -0
  230. package/dist/types/consensus.d.ts.map +1 -1
  231. package/dist/types/consensus.js.map +1 -1
  232. package/dist/types/website-strategy.d.ts +1 -1
  233. package/dist/types/workflow.d.ts +447 -0
  234. package/dist/types/workflow.d.ts.map +1 -1
  235. package/dist/types/workflow.js +3 -0
  236. package/dist/types/workflow.js.map +1 -1
  237. package/dist/upgrade/handlers.d.ts.map +1 -1
  238. package/dist/upgrade/handlers.js +6 -3
  239. package/dist/upgrade/handlers.js.map +1 -1
  240. package/dist/workflow/consensus.d.ts.map +1 -1
  241. package/dist/workflow/consensus.js +1 -0
  242. package/dist/workflow/consensus.js.map +1 -1
  243. package/dist/workflow/website-strategy.d.ts.map +1 -1
  244. package/dist/workflow/website-strategy.js +2 -29
  245. package/dist/workflow/website-strategy.js.map +1 -1
  246. package/dist/workflow/website-updater.d.ts.map +1 -1
  247. package/dist/workflow/website-updater.js +3 -2
  248. package/dist/workflow/website-updater.js.map +1 -1
  249. package/package.json +1 -1
  250. package/src/adapters/gemini.ts +51 -6
  251. package/src/adapters/grok.ts +51 -6
  252. package/src/adapters/openai.ts +53 -5
  253. package/src/cli/commands/create.ts +1 -1
  254. package/src/cli/interactive.ts +337 -20
  255. package/src/generators/all.ts +25 -2
  256. package/src/generators/doc-parser.ts +75 -15
  257. package/src/generators/templates/fullstack.ts +1 -1
  258. package/src/generators/templates/website-components.ts +1 -1
  259. package/src/generators/templates/website-config.ts +23 -11
  260. package/src/generators/templates/website-conversion.ts +1 -1
  261. package/src/generators/templates/website-landing.ts +1 -1
  262. package/src/generators/templates/website-layout.ts +491 -23
  263. package/src/generators/templates/website-pricing.ts +1 -1
  264. package/src/generators/templates/website-sections.ts +1 -1
  265. package/src/generators/templates/website-seo.ts +4 -1
  266. package/src/generators/templates/website.ts +3 -0
  267. package/src/generators/website-content-ai.ts +186 -0
  268. package/src/generators/website-content-scanner.ts +113 -1
  269. package/src/generators/website-context.ts +151 -12
  270. package/src/generators/website-debug.ts +26 -0
  271. package/src/generators/website.ts +28 -3
  272. package/src/pipeline/artifact-manager.ts +3 -0
  273. package/src/pipeline/auto-recovery.ts +283 -0
  274. package/src/pipeline/change-request.ts +63 -1
  275. package/src/pipeline/check-runner.ts +141 -2
  276. package/src/pipeline/command-resolver.ts +34 -2
  277. package/src/pipeline/consensus/arbitrator-query.ts +101 -0
  278. package/src/pipeline/consensus/consensus-runner.ts +1099 -42
  279. package/src/pipeline/cr-lifecycle.ts +103 -0
  280. package/src/pipeline/gate-engine.ts +36 -8
  281. package/src/pipeline/migration.ts +5 -30
  282. package/src/pipeline/orchestrator.ts +367 -16
  283. package/src/pipeline/packets/consensus-packet-builder.ts +44 -18
  284. package/src/pipeline/phases/architecture.ts +6 -3
  285. package/src/pipeline/phases/audit.ts +6 -3
  286. package/src/pipeline/phases/consensus-architecture.ts +10 -1
  287. package/src/pipeline/phases/consensus-master-plan.ts +10 -3
  288. package/src/pipeline/phases/consensus-role-plans.ts +10 -1
  289. package/src/pipeline/phases/done.ts +15 -4
  290. package/src/pipeline/phases/intake.ts +67 -14
  291. package/src/pipeline/phases/phase-context.ts +6 -1
  292. package/src/pipeline/phases/production-gate.ts +41 -3
  293. package/src/pipeline/phases/qa-validation.ts +51 -5
  294. package/src/pipeline/phases/recovery-loop.ts +229 -7
  295. package/src/pipeline/phases/review.ts +73 -30
  296. package/src/pipeline/phases/role-planning.ts +23 -5
  297. package/src/pipeline/phases/stuck.ts +10 -0
  298. package/src/pipeline/repo-snapshot.ts +3 -0
  299. package/src/pipeline/role-execution-adapter.ts +30 -4
  300. package/src/pipeline/skill-loader.ts +33 -0
  301. package/src/pipeline/skills/constitution-generator.ts +236 -0
  302. package/src/pipeline/skills/coverage-gate.ts +199 -0
  303. package/src/pipeline/skills/generator.ts +287 -0
  304. package/src/pipeline/skills/role-map.ts +248 -0
  305. package/src/pipeline/skills/types.ts +53 -0
  306. package/src/pipeline/skills/usage-registry.ts +87 -0
  307. package/src/pipeline/strategy-context.ts +60 -0
  308. package/src/pipeline/type-defs/artifacts.ts +5 -0
  309. package/src/pipeline/type-defs/checks.ts +4 -0
  310. package/src/pipeline/type-defs/packets.ts +18 -1
  311. package/src/pipeline/type-defs/state.ts +26 -1
  312. package/src/shared/text-utils.ts +70 -0
  313. package/src/shared/website-strategy-format.ts +56 -0
  314. package/src/state/index.ts +60 -8
  315. package/src/types/consensus.ts +1 -0
  316. package/src/types/workflow.ts +6 -0
  317. package/src/upgrade/handlers.ts +9 -3
  318. package/src/workflow/consensus.ts +1 -0
  319. package/src/workflow/website-strategy.ts +2 -36
  320. package/src/workflow/website-updater.ts +4 -2
  321. package/tests/adapters/gemini.test.ts +165 -0
  322. package/tests/adapters/grok.test.ts +137 -0
  323. package/tests/adapters/openai.test.ts +128 -0
  324. package/tests/generators/doc-parser.test.ts +88 -9
  325. package/tests/generators/quality-gate.test.ts +19 -3
  326. package/tests/generators/website-components.test.ts +34 -0
  327. package/tests/generators/website-content-ai.test.ts +308 -0
  328. package/tests/generators/website-content-scanner.test.ts +86 -0
  329. package/tests/generators/website-context.test.ts +3 -2
  330. package/tests/integration/smokestack-scaffold.test.ts +385 -0
  331. package/tests/pipeline/auto-recovery.test.ts +337 -0
  332. package/tests/pipeline/change-request.test.ts +70 -0
  333. package/tests/pipeline/command-resolver.test.ts +42 -0
  334. package/tests/pipeline/consensus/arbitrator-query.test.ts +107 -0
  335. package/tests/pipeline/consensus-runner.test.ts +1333 -10
  336. package/tests/pipeline/consensus-scoring.test.ts +602 -18
  337. package/tests/pipeline/gate-engine.test.ts +34 -0
  338. package/tests/pipeline/install-check.test.ts +261 -0
  339. package/tests/pipeline/migration.test.ts +4 -3
  340. package/tests/pipeline/orchestrator.test.ts +1506 -15
  341. package/tests/pipeline/packets/builders.test.ts +29 -6
  342. package/tests/pipeline/phases/role-planning.strategy.test.ts +204 -0
  343. package/tests/pipeline/pipeline-persistence.test.ts +230 -0
  344. package/tests/pipeline/recovery-loop-guidance.test.ts +280 -0
  345. package/tests/pipeline/role-execution-adapter.test.ts +88 -0
  346. package/tests/pipeline/skills/constitution-generator.test.ts +201 -0
  347. package/tests/pipeline/skills/coverage-gate.test.ts +370 -0
  348. package/tests/pipeline/skills/generator.test.ts +213 -0
  349. package/tests/pipeline/skills/role-map.test.ts +198 -0
  350. package/tests/pipeline/skills/usage-registry.test.ts +114 -0
  351. package/tests/pipeline/strategy-context.test.ts +148 -0
  352. package/tests/shared/text-utils.test.ts +155 -0
  353. package/tests/state/progress-analysis.test.ts +375 -0
  354. package/tests/upgrade/handlers.test.ts +33 -2
  355. package/tests/workflow/consensus.test.ts +6 -0
  356. package/tsconfig.json +1 -1
@@ -1,11 +1,25 @@
1
1
  /**
2
2
  * Consensus Runner tests — vote aggregation, packet construction,
3
- * prompt building. (LLM calls are not tested here.)
3
+ * prompt building, normalization wiring, arbitration triggers.
4
+ * (LLM calls are not tested here.)
4
5
  */
5
6
 
6
- import { describe, it, expect } from 'vitest';
7
+ import { describe, it, expect, vi } from 'vitest';
8
+ import * as fs from 'node:fs';
9
+ import * as path from 'node:path';
10
+ import * as os from 'node:os';
7
11
  import {
8
12
  buildReviewPrompt,
13
+ mapVote,
14
+ hasVoteDisagreement,
15
+ normalizeVoteBlockers,
16
+ DEFAULT_CONDITIONAL_FLOOR,
17
+ ConsensusRunner,
18
+ parseRawReviewResponse,
19
+ parseArbitratorResponse,
20
+ loadPlanContent,
21
+ correctConfidenceContradiction,
22
+ getArbitrationTrigger,
9
23
  } from '../../src/pipeline/consensus/consensus-runner.js';
10
24
  import {
11
25
  buildConsensusPacket,
@@ -105,7 +119,7 @@ describe('ConsensusRunner', () => {
105
119
  expect(prompt).toContain('Auth strategy?');
106
120
  });
107
121
 
108
- it('should include review instructions', () => {
122
+ it('should include review instructions with scoring guide', () => {
109
123
  const packet = makePlanPacket();
110
124
  const prompt = buildReviewPrompt(packet);
111
125
 
@@ -113,6 +127,92 @@ describe('ConsensusRunner', () => {
113
127
  expect(prompt).toContain('REJECT');
114
128
  expect(prompt).toContain('CONDITIONAL');
115
129
  expect(prompt).toContain('Completeness');
130
+ expect(prompt).toContain('Scoring Guide');
131
+ expect(prompt).toContain('[BLOCKER]');
132
+ expect(prompt).toContain('[REQUIRED]');
133
+ expect(prompt).toContain('[SUGGESTION]');
134
+ });
135
+
136
+ it('should include revision notice for version > 1 (v2.4.2)', () => {
137
+ const packet = makePlanPacket({
138
+ metadata: {
139
+ packet_id: 'plan-2',
140
+ timestamp: new Date().toISOString(),
141
+ phase: 'INTAKE',
142
+ submitted_by: 'DISPATCHER',
143
+ version: 2,
144
+ },
145
+ });
146
+ const prompt = buildReviewPrompt(packet);
147
+
148
+ expect(prompt).toContain('Revision Notice');
149
+ expect(prompt).toContain('prior issues');
150
+ });
151
+
152
+ it('should NOT include revision notice for version 1', () => {
153
+ const packet = makePlanPacket();
154
+ const prompt = buildReviewPrompt(packet);
155
+
156
+ expect(prompt).not.toContain('Revision Notice');
157
+ });
158
+ });
159
+
160
+ describe('mapVote', () => {
161
+ it('should APPROVE when confidence meets threshold', () => {
162
+ expect(mapVote(0.96, 0.95)).toBe('APPROVE');
163
+ expect(mapVote(0.95, 0.95)).toBe('APPROVE');
164
+ expect(mapVote(1.0, 0.95)).toBe('APPROVE');
165
+ });
166
+
167
+ it('should CONDITIONAL for floor to threshold', () => {
168
+ expect(mapVote(0.94, 0.95)).toBe('CONDITIONAL');
169
+ expect(mapVote(0.90, 0.95)).toBe('CONDITIONAL');
170
+ expect(mapVote(0.85, 0.95)).toBe('CONDITIONAL');
171
+ expect(mapVote(0.80, 0.95)).toBe('CONDITIONAL');
172
+ });
173
+
174
+ it('should REJECT below floor', () => {
175
+ expect(mapVote(0.79, 0.95)).toBe('REJECT');
176
+ expect(mapVote(0.50, 0.95)).toBe('REJECT');
177
+ expect(mapVote(0.0, 0.95)).toBe('REJECT');
178
+ });
179
+
180
+ it('should respect custom thresholds', () => {
181
+ expect(mapVote(0.90, 0.90)).toBe('APPROVE');
182
+ expect(mapVote(0.85, 0.90)).toBe('CONDITIONAL');
183
+ });
184
+
185
+ it('should clamp out-of-range inputs', () => {
186
+ expect(mapVote(1.5, 0.95)).toBe('APPROVE');
187
+ expect(mapVote(-0.1, 0.95)).toBe('REJECT');
188
+ });
189
+
190
+ it('should handle conditionalFloor > threshold by clamping floor', () => {
191
+ expect(mapVote(0.90, 0.85, 0.95)).toBe('APPROVE');
192
+ });
193
+
194
+ it('should export DEFAULT_CONDITIONAL_FLOOR as 0.80', () => {
195
+ expect(DEFAULT_CONDITIONAL_FLOOR).toBe(0.80);
196
+ });
197
+ });
198
+
199
+ describe('hasVoteDisagreement', () => {
200
+ it('should return false for single vote', () => {
201
+ expect(hasVoteDisagreement([makeVote('r1', 'APPROVE')])).toBe(false);
202
+ });
203
+
204
+ it('should return false for unanimous votes', () => {
205
+ expect(hasVoteDisagreement([
206
+ makeVote('r1', 'APPROVE'),
207
+ makeVote('r2', 'APPROVE'),
208
+ ])).toBe(false);
209
+ });
210
+
211
+ it('should return true for mixed votes', () => {
212
+ expect(hasVoteDisagreement([
213
+ makeVote('r1', 'APPROVE'),
214
+ makeVote('r2', 'REJECT'),
215
+ ])).toBe(true);
116
216
  });
117
217
  });
118
218
 
@@ -120,7 +220,10 @@ describe('ConsensusRunner', () => {
120
220
  it('should approve when all reviewers approve with sufficient quorum', () => {
121
221
  const packet = buildConsensusPacket({
122
222
  planPacketRef: makeRef(),
123
- votes: [makeVote('r1', 'APPROVE'), makeVote('r2', 'APPROVE')],
223
+ votes: [
224
+ makeVote('r1', 'APPROVE', 0.96),
225
+ makeVote('r2', 'APPROVE', 0.97),
226
+ ],
124
227
  rules: { threshold: 0.95, quorum: 2, min_reviewers: 2 },
125
228
  });
126
229
 
@@ -142,9 +245,9 @@ describe('ConsensusRunner', () => {
142
245
 
143
246
  it('should handle multi-provider votes', () => {
144
247
  const votes: ReviewerVote[] = [
145
- { ...makeVote('r1', 'APPROVE'), provider: 'openai', model: 'gpt-4o' },
146
- { ...makeVote('r2', 'APPROVE'), provider: 'gemini', model: 'gemini-2.0-flash' },
147
- { ...makeVote('r3', 'APPROVE'), provider: 'grok', model: 'grok-3' },
248
+ { ...makeVote('r1', 'APPROVE', 0.96), provider: 'openai', model: 'gpt-4o' },
249
+ { ...makeVote('r2', 'APPROVE', 0.97), provider: 'gemini', model: 'gemini-2.0-flash' },
250
+ { ...makeVote('r3', 'APPROVE', 0.98), provider: 'grok', model: 'grok-3' },
148
251
  ];
149
252
 
150
253
  const packet = buildConsensusPacket({
@@ -164,7 +267,6 @@ describe('ConsensusRunner', () => {
164
267
  rules: { threshold: 0.95, quorum: 2, min_reviewers: 2 },
165
268
  });
166
269
 
167
- // CONDITIONAL is not APPROVE, so score = 0.5
168
270
  expect(packet.consensus_result.score).toBe(0.5);
169
271
  expect(packet.final_status).toBe('REJECTED');
170
272
  });
@@ -183,11 +285,10 @@ describe('ConsensusRunner', () => {
183
285
  it('should reject when quorum not met', () => {
184
286
  const packet = buildConsensusPacket({
185
287
  planPacketRef: makeRef(),
186
- votes: [makeVote('r1', 'APPROVE')],
288
+ votes: [makeVote('r1', 'APPROVE', 0.96)],
187
289
  rules: { threshold: 0.5, quorum: 2, min_reviewers: 2 },
188
290
  });
189
291
 
190
- // 1 approver, score = 1.0, but quorum = 2, only 1 voter
191
292
  expect(packet.consensus_result.approved).toBe(false);
192
293
  });
193
294
 
@@ -203,4 +304,1226 @@ describe('ConsensusRunner', () => {
203
304
  expect(packet.metadata.plan_packet_id).toBe(planRef.artifact_id);
204
305
  });
205
306
  });
307
+
308
+ describe('arbitration triggers', () => {
309
+ it('triggers arbitration on vote disagreement when enableArbitration=true', () => {
310
+ // We test the shouldArbitrate logic indirectly through normalizedVotes + hasVoteDisagreement
311
+ const votes = [
312
+ makeVote('r1', 'APPROVE', 0.96),
313
+ makeVote('r2', 'REJECT', 0.5),
314
+ ];
315
+ expect(hasVoteDisagreement(votes)).toBe(true);
316
+
317
+ // The actual arbitration call requires LLM, so we verify the condition only
318
+ const runner = new ConsensusRunner({
319
+ mode: 'independent',
320
+ minReviewers: 2,
321
+ threshold: 0.95,
322
+ quorum: 2,
323
+ projectDir: '/tmp/test',
324
+ enableArbitration: true,
325
+ arbitratorProvider: { provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.2 },
326
+ });
327
+ // Verify config is set
328
+ expect(runner).toBeDefined();
329
+ });
330
+
331
+ it('does not trigger when enableArbitration=false', () => {
332
+ const runner = new ConsensusRunner({
333
+ mode: 'independent',
334
+ minReviewers: 2,
335
+ threshold: 0.95,
336
+ quorum: 2,
337
+ projectDir: '/tmp/test',
338
+ enableArbitration: false,
339
+ });
340
+ // No arbitratorProvider means no arbitration call
341
+ expect(runner).toBeDefined();
342
+ });
343
+
344
+ it('triggers on "death by conditional" (all CONDITIONAL, avg conf >= 0.94, required_changes <= 3)', () => {
345
+ const votes: ReviewerVote[] = [
346
+ {
347
+ ...makeVote('r1', 'CONDITIONAL', 0.94),
348
+ blocking_issues: [],
349
+ required_changes: ['Add error handling'],
350
+ },
351
+ {
352
+ ...makeVote('r2', 'CONDITIONAL', 0.95),
353
+ blocking_issues: [],
354
+ required_changes: ['Add input validation'],
355
+ },
356
+ ];
357
+
358
+ const allConditional = votes.every(v => v.vote === 'CONDITIONAL');
359
+ const avgConf = votes.reduce((s, v) => s + v.confidence, 0) / votes.length;
360
+ const totalRequired = votes.reduce((s, v) => s + (v.required_changes?.length ?? 0), 0);
361
+
362
+ expect(allConditional).toBe(true);
363
+ expect(avgConf).toBeGreaterThanOrEqual(0.94);
364
+ expect(totalRequired).toBeLessThanOrEqual(3);
365
+ });
366
+
367
+ it('does NOT trigger "death by conditional" when required_changes > 3', () => {
368
+ const votes: ReviewerVote[] = [
369
+ {
370
+ ...makeVote('r1', 'CONDITIONAL', 0.94),
371
+ blocking_issues: [],
372
+ required_changes: ['Fix A', 'Fix B'],
373
+ },
374
+ {
375
+ ...makeVote('r2', 'CONDITIONAL', 0.95),
376
+ blocking_issues: [],
377
+ required_changes: ['Fix C', 'Fix D'],
378
+ },
379
+ ];
380
+
381
+ const totalRequired = votes.reduce((s, v) => s + (v.required_changes?.length ?? 0), 0);
382
+ expect(totalRequired).toBe(4);
383
+ expect(totalRequired).toBeGreaterThan(3);
384
+ });
385
+
386
+ it('v2.4.2: caps at 1 attempt per phase+version (version-keyed tracking)', () => {
387
+ const runner = new ConsensusRunner({
388
+ mode: 'independent',
389
+ minReviewers: 2,
390
+ threshold: 0.95,
391
+ quorum: 2,
392
+ projectDir: '/tmp/test',
393
+ enableArbitration: true,
394
+ arbitratorProvider: { provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.2 },
395
+ });
396
+
397
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
398
+ const attempted = (runner as any).arbitrationAttempted as Set<string>;
399
+ attempted.add('CONSENSUS_MASTER_PLAN@v1');
400
+ expect(attempted.has('CONSENSUS_MASTER_PLAN@v1')).toBe(true);
401
+ // Same phase with new version is NOT blocked
402
+ expect(attempted.has('CONSENSUS_MASTER_PLAN@v2')).toBe(false);
403
+ // Different phase is NOT blocked
404
+ expect(attempted.has('CONSENSUS_ARCHITECTURE@v1')).toBe(false);
405
+ });
406
+
407
+ it('ARBITRATED packet includes arbitrator_result', () => {
408
+ const packet = buildConsensusPacket({
409
+ planPacketRef: makeRef(),
410
+ votes: [makeVote('r1', 'CONDITIONAL', 0.88)],
411
+ rules: { threshold: 0.95, quorum: 1, min_reviewers: 1 },
412
+ arbitratorResult: {
413
+ decision: 'Plan is execution-ready with minor amendments',
414
+ merged_patch: 'Add error handling to endpoint /api/users',
415
+ },
416
+ });
417
+
418
+ expect(packet.final_status).toBe('ARBITRATED');
419
+ expect(packet.arbitrator_result).toBeDefined();
420
+ expect(packet.arbitrator_result?.decision).toContain('execution-ready');
421
+ expect(packet.arbitrator_result?.merged_patch).toContain('error handling');
422
+ });
423
+ });
424
+
425
+ describe('parseRawReviewResponse — JSON parsing', () => {
426
+ it('parses JSON response with APPROVE and 0.96 confidence', () => {
427
+ const raw = JSON.stringify({
428
+ vote: 'APPROVE',
429
+ confidence: 0.96,
430
+ blocking_issues: [],
431
+ required_changes: [],
432
+ suggestions: ['Consider adding rate limiting'],
433
+ analysis: 'Plan looks solid.',
434
+ });
435
+ const result = parseRawReviewResponse(raw);
436
+
437
+ expect(result.confidence).toBe(0.96);
438
+ expect(result.modelVote).toBe('APPROVE');
439
+ expect(result.blockingIssues).toEqual([]);
440
+ expect(result.suggestions).toEqual(['Consider adding rate limiting']);
441
+ });
442
+
443
+ it('parses JSON wrapped in markdown code fences', () => {
444
+ const raw = '```json\n' + JSON.stringify({
445
+ vote: 'CONDITIONAL',
446
+ confidence: 0.88,
447
+ blocking_issues: [],
448
+ required_changes: ['[REQUIRED] Add error handling'],
449
+ suggestions: [],
450
+ analysis: 'Needs work.',
451
+ }) + '\n```';
452
+ const result = parseRawReviewResponse(raw);
453
+
454
+ expect(result.confidence).toBe(0.88);
455
+ expect(result.modelVote).toBe('CONDITIONAL');
456
+ expect(result.requiredChanges).toEqual(['[REQUIRED] Add error handling']);
457
+ });
458
+
459
+ it('parses JSON wrapped in code fences without json label', () => {
460
+ const raw = '```\n' + JSON.stringify({
461
+ vote: 'REJECT',
462
+ confidence: 0.45,
463
+ blocking_issues: ['[BLOCKER] Missing auth'],
464
+ required_changes: [],
465
+ suggestions: [],
466
+ }) + '\n```';
467
+ const result = parseRawReviewResponse(raw);
468
+
469
+ expect(result.confidence).toBe(0.45);
470
+ expect(result.modelVote).toBe('REJECT');
471
+ expect(result.blockingIssues).toEqual(['[BLOCKER] Missing auth']);
472
+ });
473
+
474
+ it('returns null for invalid JSON and falls back to regex', () => {
475
+ const raw = 'This is not JSON but APPROVE with CONFIDENCE: 0.92';
476
+ const result = parseRawReviewResponse(raw);
477
+
478
+ // APPROVE + 0.92 is in [0.80, 0.95) range -> corrected to 0.95 by contradiction detector
479
+ expect(result.confidence).toBe(0.95);
480
+ expect(result.modelVote).toBe('APPROVE');
481
+ });
482
+ });
483
+
484
+ describe('parseRawReviewResponse — regex fallback', () => {
485
+ it('parses CONDITIONAL with 0.88 confidence via regex', () => {
486
+ const raw = `VOTE: CONDITIONAL
487
+ CONFIDENCE: 0.88
488
+
489
+ REQUIRED CHANGES:
490
+ - Add input validation
491
+ - Improve error messages
492
+
493
+ SUGGESTIONS:
494
+ - Consider caching`;
495
+
496
+ const result = parseRawReviewResponse(raw);
497
+ expect(result.confidence).toBe(0.88);
498
+ expect(result.modelVote).toBe('CONDITIONAL');
499
+ });
500
+
501
+ it('parses REJECT with blocking issues via regex', () => {
502
+ const raw = `VOTE: REJECT
503
+ CONFIDENCE: 0.55
504
+
505
+ [BLOCKER] Missing authentication
506
+ [BLOCKER] No rate limiting
507
+ [SUGGESTION] Add logging`;
508
+
509
+ const result = parseRawReviewResponse(raw);
510
+ expect(result.confidence).toBe(0.55);
511
+ expect(result.modelVote).toBe('REJECT');
512
+ expect(result.blockingIssues).toContain('Missing authentication');
513
+ expect(result.blockingIssues).toContain('No rate limiting');
514
+ expect(result.suggestions).toContain('Add logging');
515
+ });
516
+
517
+ it('handles CONSENSUS: XX% fallback format', () => {
518
+ const raw = `ANALYSIS: This plan looks good.
519
+ CONSENSUS: 92%`;
520
+
521
+ const result = parseRawReviewResponse(raw);
522
+ expect(result.confidence).toBe(0.92);
523
+ });
524
+
525
+ it('extracts [BLOCKER], [REQUIRED], [SUGGESTION] tagged items', () => {
526
+ const raw = `VOTE: REJECT
527
+ CONFIDENCE: 0.40
528
+ [BLOCKER] SQL injection vulnerability in user input handling
529
+ [REQUIRED] Add input sanitization
530
+ [SUGGESTION] Consider using parameterized queries throughout`;
531
+
532
+ const result = parseRawReviewResponse(raw);
533
+ expect(result.blockingIssues).toContain('SQL injection vulnerability in user input handling');
534
+ expect(result.requiredChanges).toContain('Add input sanitization');
535
+ expect(result.suggestions).toContain('Consider using parameterized queries throughout');
536
+ });
537
+
538
+ it('confidence > 1 is treated as percentage and normalized', () => {
539
+ const raw = 'CONFIDENCE: 92';
540
+ const result = parseRawReviewResponse(raw);
541
+ expect(result.confidence).toBe(0.92);
542
+ });
543
+
544
+ it('defaults to confidence 0 when no parseable score', () => {
545
+ const raw = 'This plan is mediocre.';
546
+ const result = parseRawReviewResponse(raw);
547
+ expect(result.confidence).toBe(0);
548
+ });
549
+
550
+ it('extracts vote even when mixed with other text', () => {
551
+ const raw = 'After careful analysis, I believe the plan deserves APPROVE. Confidence: 0.97';
552
+ const result = parseRawReviewResponse(raw);
553
+ expect(result.modelVote).toBe('APPROVE');
554
+ expect(result.confidence).toBe(0.97);
555
+ });
556
+
557
+ it('extracts confidence from truncated JSON (quotes around key)', () => {
558
+ // Simulates a truncated JSON response where JSON.parse fails but the
559
+ // regex fallback should still extract confidence from "confidence": 0.88
560
+ const raw = '```json\n{"vote": "CONDITIONAL", "confidence": 0.88, "blocking_issues": [], "required_changes": ["[REQUIRED] Add...';
561
+ const result = parseRawReviewResponse(raw);
562
+ expect(result.modelVote).toBe('CONDITIONAL');
563
+ expect(result.confidence).toBe(0.88);
564
+ });
565
+
566
+ it('handles numbered bullet lists in sections', () => {
567
+ const raw = `VOTE: CONDITIONAL
568
+ CONFIDENCE: 0.85
569
+
570
+ REQUIRED CHANGES:
571
+ 1. Add error handling
572
+ 2. Improve validation
573
+ 3. Fix API route naming`;
574
+
575
+ const result = parseRawReviewResponse(raw);
576
+ expect(result.requiredChanges).toHaveLength(3);
577
+ expect(result.requiredChanges).toContain('Add error handling');
578
+ });
579
+ });
580
+
581
+ describe('governance rule: vote derived from confidence', () => {
582
+ it('modelVote APPROVE with confidence 0.93 -> derived vote is CONDITIONAL', () => {
583
+ // Simulate what spawnSingleReviewer does:
584
+ // model says APPROVE but confidence 0.93 < 0.95 threshold
585
+ const confidence = 0.93;
586
+ const threshold = 0.95;
587
+ const derived = mapVote(confidence, threshold);
588
+ const modelVote = 'APPROVE';
589
+
590
+ expect(derived).toBe('CONDITIONAL');
591
+ expect(modelVote).not.toBe(derived);
592
+ });
593
+
594
+ it('modelVote REJECT with confidence 0.96 -> derived vote is APPROVE', () => {
595
+ // model says REJECT but confidence 0.96 >= 0.95 threshold
596
+ const confidence = 0.96;
597
+ const threshold = 0.95;
598
+ const derived = mapVote(confidence, threshold);
599
+ const modelVote = 'REJECT';
600
+
601
+ expect(derived).toBe('APPROVE');
602
+ expect(modelVote).not.toBe(derived);
603
+ });
604
+
605
+ it('reviewer_inconsistency is true when model and derived disagree', () => {
606
+ const confidence = 0.93;
607
+ const threshold = 0.95;
608
+ const derived = mapVote(confidence, threshold);
609
+ const modelVote = 'APPROVE';
610
+ const reviewer_inconsistency = modelVote !== null && modelVote !== derived;
611
+
612
+ expect(reviewer_inconsistency).toBe(true);
613
+ });
614
+
615
+ it('reviewer_inconsistency is false when model and derived agree', () => {
616
+ const confidence = 0.96;
617
+ const threshold = 0.95;
618
+ const derived = mapVote(confidence, threshold);
619
+ const modelVote = 'APPROVE';
620
+ const reviewer_inconsistency = modelVote !== null && modelVote !== derived;
621
+
622
+ expect(reviewer_inconsistency).toBe(false);
623
+ });
624
+
625
+ it('vote derivation always uses mapVote regardless of modelVote', () => {
626
+ // Even if modelVote is null, derived should still work
627
+ const confidence = 0.50;
628
+ const threshold = 0.95;
629
+ const derived = mapVote(confidence, threshold);
630
+
631
+ expect(derived).toBe('REJECT');
632
+ });
633
+ });
634
+
635
+ describe('buildReviewPrompt — JSON response format', () => {
636
+ it('should request JSON response format', () => {
637
+ const packet = makePlanPacket();
638
+ const prompt = buildReviewPrompt(packet);
639
+
640
+ expect(prompt).toContain('Return ONLY a JSON object');
641
+ expect(prompt).toContain('"vote"');
642
+ expect(prompt).toContain('"confidence"');
643
+ expect(prompt).toContain('"blocking_issues"');
644
+ expect(prompt).toContain('"required_changes"');
645
+ expect(prompt).toContain('"suggestions"');
646
+ });
647
+
648
+ it('should include confidence scale guidance', () => {
649
+ const packet = makePlanPacket();
650
+ const prompt = buildReviewPrompt(packet);
651
+
652
+ expect(prompt).toContain('0.95-1.00: APPROVE');
653
+ expect(prompt).toContain('0.80-0.94: CONDITIONAL');
654
+ expect(prompt).toContain('Below 0.80: REJECT');
655
+ });
656
+
657
+ it('should not contain old "Respond with" format', () => {
658
+ const packet = makePlanPacket();
659
+ const prompt = buildReviewPrompt(packet);
660
+
661
+ expect(prompt).not.toContain('Respond with:\n- APPROVE, REJECT, or CONDITIONAL');
662
+ expect(prompt).not.toContain('Confidence score (0-1)');
663
+ });
664
+ });
665
+
666
+ // ─── v2.4.1: Plan Content Loading Tests ──────────────────
667
+
668
+ describe('loadPlanContent', () => {
669
+ function makeTempDir(): string {
670
+ return fs.mkdtempSync(path.join(os.tmpdir(), 'consensus-test-'));
671
+ }
672
+
673
+ it('loads content from valid path', () => {
674
+ const dir = makeTempDir();
675
+ const planPath = 'docs/master_plan.md';
676
+ fs.mkdirSync(path.join(dir, 'docs'), { recursive: true });
677
+ fs.writeFileSync(path.join(dir, planPath), '# My Plan\nDetails here.');
678
+
679
+ const result = loadPlanContent(dir, planPath);
680
+ expect(result.content).toContain('# My Plan');
681
+ expect(result.content).toContain('Details here.');
682
+ expect(result.truncated).toBe(false);
683
+
684
+ fs.rmSync(dir, { recursive: true, force: true });
685
+ });
686
+
687
+ it('returns empty for missing file', () => {
688
+ const dir = makeTempDir();
689
+ const result = loadPlanContent(dir, 'docs/nonexistent.md');
690
+ expect(result.content).toBe('');
691
+ expect(result.truncated).toBe(false);
692
+
693
+ fs.rmSync(dir, { recursive: true, force: true });
694
+ });
695
+
696
+ it('blocks path traversal (../../etc/passwd)', () => {
697
+ const dir = makeTempDir();
698
+ const result = loadPlanContent(dir, '../../etc/passwd');
699
+ expect(result.content).toBe('');
700
+ expect(result.truncated).toBe(false);
701
+
702
+ fs.rmSync(dir, { recursive: true, force: true });
703
+ });
704
+
705
+ it('blocks path traversal (absolute path escape)', () => {
706
+ const dir = makeTempDir();
707
+ // Even if the attacker uses a relative path that resolves outside
708
+ const result = loadPlanContent(dir, '../../../tmp/evil.txt');
709
+ expect(result.content).toBe('');
710
+
711
+ fs.rmSync(dir, { recursive: true, force: true });
712
+ });
713
+
714
+ it('truncates content exceeding 50K chars', () => {
715
+ const dir = makeTempDir();
716
+ const planPath = 'plan.md';
717
+ // Create 60K content
718
+ const bigContent = 'A'.repeat(60_000);
719
+ fs.writeFileSync(path.join(dir, planPath), bigContent);
720
+
721
+ const result = loadPlanContent(dir, planPath);
722
+ expect(result.truncated).toBe(true);
723
+ expect(result.content).toContain('[TRUNCATED');
724
+ // Content should be capped around 50K + truncation marker
725
+ expect(result.content.length).toBeLessThan(60_000);
726
+
727
+ fs.rmSync(dir, { recursive: true, force: true });
728
+ });
729
+
730
+ it('returns empty when artifactPath is undefined', () => {
731
+ const result = loadPlanContent('/tmp/test', undefined);
732
+ expect(result.content).toBe('');
733
+ expect(result.truncated).toBe(false);
734
+ });
735
+ });
736
+
737
+ // ─── v2.4.1: Plan Content in Prompt Tests ────────────────
738
+
739
+ describe('buildReviewPrompt — plan content', () => {
740
+ it('includes plan content when provided', () => {
741
+ const packet = makePlanPacket();
742
+ const prompt = buildReviewPrompt(packet, '# Master Plan\nBuild the API.');
743
+
744
+ expect(prompt).toContain('## Plan Content');
745
+ expect(prompt).toContain('# Master Plan');
746
+ expect(prompt).toContain('Build the API.');
747
+ });
748
+
749
+ it('shows warning when plan content is empty', () => {
750
+ const packet = makePlanPacket();
751
+ const prompt = buildReviewPrompt(packet, '');
752
+
753
+ expect(prompt).toContain('## Plan Content');
754
+ expect(prompt).toContain('[WARNING: Plan content could not be loaded');
755
+ });
756
+
757
+ it('shows warning when plan content is undefined (backward compat)', () => {
758
+ const packet = makePlanPacket();
759
+ const prompt = buildReviewPrompt(packet);
760
+
761
+ expect(prompt).toContain('## Plan Content');
762
+ expect(prompt).toContain('[WARNING: Plan content could not be loaded');
763
+ });
764
+ });
765
+
766
+ // ─── v2.4.1: Symmetric Confidence Contradiction Correction ─
767
+
768
+ describe('correctConfidenceContradiction', () => {
769
+ it('corrects REJECT + 0.99 -> min(0.79, 0.01) = 0.01', () => {
770
+ const r = correctConfidenceContradiction('REJECT', 0.99);
771
+ expect(r.confidence).toBeCloseTo(0.01, 5);
772
+ expect(r.wasContradiction).toBe(true);
773
+ expect(r.original).toBeCloseTo(0.99, 5);
774
+ });
775
+
776
+ it('corrects REJECT + 0.85 -> min(0.79, 0.15) = 0.15', () => {
777
+ const r = correctConfidenceContradiction('REJECT', 0.85);
778
+ expect(r.confidence).toBeCloseTo(0.15, 5);
779
+ expect(r.wasContradiction).toBe(true);
780
+ });
781
+
782
+ it('does NOT correct REJECT + 0.50 (already in range)', () => {
783
+ const r = correctConfidenceContradiction('REJECT', 0.50);
784
+ expect(r.confidence).toBeCloseTo(0.50, 5);
785
+ expect(r.wasContradiction).toBe(false);
786
+ });
787
+
788
+ it('corrects CONDITIONAL + 0.98 -> snap to 0.87', () => {
789
+ const r = correctConfidenceContradiction('CONDITIONAL', 0.98);
790
+ expect(r.confidence).toBeCloseTo(0.87, 5);
791
+ expect(r.wasContradiction).toBe(true);
792
+ });
793
+
794
+ it('corrects CONDITIONAL + 0.60 -> snap to 0.87', () => {
795
+ const r = correctConfidenceContradiction('CONDITIONAL', 0.60);
796
+ expect(r.confidence).toBeCloseTo(0.87, 5);
797
+ expect(r.wasContradiction).toBe(true);
798
+ });
799
+
800
+ it('does NOT correct CONDITIONAL + 0.88 (already in range)', () => {
801
+ const r = correctConfidenceContradiction('CONDITIONAL', 0.88);
802
+ expect(r.confidence).toBeCloseTo(0.88, 5);
803
+ expect(r.wasContradiction).toBe(false);
804
+ });
805
+
806
+ it('corrects APPROVE + 0.40 -> max(0.95, 0.60) = 0.95', () => {
807
+ const r = correctConfidenceContradiction('APPROVE', 0.40);
808
+ expect(r.confidence).toBeCloseTo(0.95, 5);
809
+ expect(r.wasContradiction).toBe(true);
810
+ });
811
+
812
+ it('corrects APPROVE + 0.02 -> max(0.95, 0.98) = 0.98', () => {
813
+ const r = correctConfidenceContradiction('APPROVE', 0.02);
814
+ expect(r.confidence).toBeCloseTo(0.98, 5);
815
+ expect(r.wasContradiction).toBe(true);
816
+ });
817
+
818
+ it('corrects APPROVE + 0.88 -> snap to 0.95', () => {
819
+ const r = correctConfidenceContradiction('APPROVE', 0.88);
820
+ expect(r.confidence).toBeCloseTo(0.95, 5);
821
+ expect(r.wasContradiction).toBe(true);
822
+ });
823
+
824
+ it('does NOT correct APPROVE + 0.96 (already in range)', () => {
825
+ const r = correctConfidenceContradiction('APPROVE', 0.96);
826
+ expect(r.confidence).toBeCloseTo(0.96, 5);
827
+ expect(r.wasContradiction).toBe(false);
828
+ });
829
+
830
+ it('does NOT correct when modelVote is null', () => {
831
+ const r = correctConfidenceContradiction(null, 0.99);
832
+ expect(r.confidence).toBeCloseTo(0.99, 5);
833
+ expect(r.wasContradiction).toBe(false);
834
+ });
835
+ });
836
+
837
+ // ─── v2.4.1: Governance Preservation Tests ────────────────
838
+
839
+ describe('governance preservation (correctConfidenceContradiction + mapVote)', () => {
840
+ it('corrected REJECT derives REJECT via mapVote', () => {
841
+ const { confidence } = correctConfidenceContradiction('REJECT', 0.99);
842
+ expect(mapVote(confidence, 0.95)).toBe('REJECT');
843
+ });
844
+
845
+ it('corrected CONDITIONAL derives CONDITIONAL via mapVote', () => {
846
+ const { confidence } = correctConfidenceContradiction('CONDITIONAL', 0.98);
847
+ expect(mapVote(confidence, 0.95)).toBe('CONDITIONAL');
848
+ });
849
+
850
+ it('corrected APPROVE derives APPROVE via mapVote', () => {
851
+ const { confidence } = correctConfidenceContradiction('APPROVE', 0.40);
852
+ expect(mapVote(confidence, 0.95)).toBe('APPROVE');
853
+ });
854
+ });
855
+
856
+ // ─── v2.4.1: Integration (parseRawReviewResponse + correction) ─
857
+
858
+ describe('parseRawReviewResponse — confidence contradiction correction', () => {
859
+ it('corrects JSON response with REJECT + 0.99 and logs warning', () => {
860
+ const raw = JSON.stringify({
861
+ vote: 'REJECT',
862
+ confidence: 0.99,
863
+ blocking_issues: ['[BLOCKER] Missing auth'],
864
+ required_changes: [],
865
+ suggestions: [],
866
+ });
867
+ const result = parseRawReviewResponse(raw);
868
+
869
+ // Confidence should be corrected: min(0.79, 1 - 0.99) = 0.01
870
+ expect(result.confidence).toBeCloseTo(0.01, 5);
871
+ expect(result.modelVote).toBe('REJECT');
872
+ });
873
+
874
+ it('does not alter valid REJECT + 0.45', () => {
875
+ const raw = JSON.stringify({
876
+ vote: 'REJECT',
877
+ confidence: 0.45,
878
+ blocking_issues: ['[BLOCKER] Missing auth'],
879
+ required_changes: [],
880
+ suggestions: [],
881
+ });
882
+ const result = parseRawReviewResponse(raw);
883
+
884
+ expect(result.confidence).toBeCloseTo(0.45, 5);
885
+ });
886
+
887
+ it('corrects JSON response with APPROVE + 0.40', () => {
888
+ const raw = JSON.stringify({
889
+ vote: 'APPROVE',
890
+ confidence: 0.40,
891
+ blocking_issues: [],
892
+ required_changes: [],
893
+ suggestions: [],
894
+ });
895
+ const result = parseRawReviewResponse(raw);
896
+
897
+ // Corrected: max(0.95, 1 - 0.40) = max(0.95, 0.60) = 0.95
898
+ expect(result.confidence).toBeCloseTo(0.95, 5);
899
+ });
900
+ });
901
+
902
+ // ─── v2.4.1: Prompt Wording Tests ────────────────────────
903
+
904
+ describe('buildReviewPrompt — confidence semantics wording', () => {
905
+ it('should state confidence is plan quality not review certainty', () => {
906
+ const packet = makePlanPacket();
907
+ const prompt = buildReviewPrompt(packet);
908
+
909
+ expect(prompt).toContain('PLAN QUALITY');
910
+ expect(prompt).toContain('NOT how certain you are');
911
+ });
912
+
913
+ it('should warn that mismatches will be auto-corrected', () => {
914
+ const packet = makePlanPacket();
915
+ const prompt = buildReviewPrompt(packet);
916
+
917
+ expect(prompt).toContain('auto-corrected');
918
+ expect(prompt).toContain('Mismatched vote+confidence');
919
+ });
920
+
921
+ it('should include valid/invalid response examples', () => {
922
+ const packet = makePlanPacket();
923
+ const prompt = buildReviewPrompt(packet);
924
+
925
+ expect(prompt).toContain('Examples of VALID responses');
926
+ expect(prompt).toContain('Examples of INVALID responses');
927
+ expect(prompt).toContain('REJECT with confidence 0.99');
928
+ expect(prompt).toContain('APPROVE with confidence 0.60');
929
+ });
930
+ });
931
+
932
+ // ─── v2.4.1: Arbitrator Governance Tests ──────────────────
933
+
934
+ describe('arbitrator governance (confidence-only derivation)', () => {
935
+ it('arbitrator approval derived from confidence only, not modelVote', () => {
936
+ const rawLowConf = JSON.stringify({
937
+ vote: 'APPROVE',
938
+ confidence: 0.85,
939
+ blocking_issues: [],
940
+ required_changes: [],
941
+ suggestions: [],
942
+ });
943
+ const parsedLow = parseRawReviewResponse(rawLowConf);
944
+ // APPROVE + 0.85 -> corrected to 0.95, so approved = true
945
+ const approvedCorrected = parsedLow.confidence >= 0.90;
946
+ expect(approvedCorrected).toBe(true);
947
+
948
+ // Now test: REJECT + 0.50 -> no correction, confidence stays 0.50
949
+ const rawReject = JSON.stringify({
950
+ vote: 'REJECT',
951
+ confidence: 0.50,
952
+ blocking_issues: ['[BLOCKER] Bad plan'],
953
+ required_changes: [],
954
+ suggestions: [],
955
+ });
956
+ const parsedReject = parseRawReviewResponse(rawReject);
957
+ const approvedReject = parsedReject.confidence >= 0.90;
958
+ expect(approvedReject).toBe(false);
959
+ });
960
+ });
961
+
962
+ // ─── v2.4.2: Version-keyed Arbitration Tests ──────────────
963
+
964
+ describe('version-keyed arbitration (v2.4.2)', () => {
965
+ it('same phase + new version allows retry', () => {
966
+ const runner = new ConsensusRunner({
967
+ mode: 'independent',
968
+ minReviewers: 2,
969
+ threshold: 0.95,
970
+ quorum: 2,
971
+ projectDir: '/tmp/test',
972
+ enableArbitration: true,
973
+ arbitratorProvider: { provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.2 },
974
+ });
975
+
976
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
977
+ const attempted = (runner as any).arbitrationAttempted as Set<string>;
978
+ attempted.add('CONSENSUS_MASTER_PLAN@v1');
979
+
980
+ // v2 should NOT be blocked
981
+ expect(attempted.has('CONSENSUS_MASTER_PLAN@v2')).toBe(false);
982
+ });
983
+
984
+ it('same phase + same version blocks retry', () => {
985
+ const runner = new ConsensusRunner({
986
+ mode: 'independent',
987
+ minReviewers: 2,
988
+ threshold: 0.95,
989
+ quorum: 2,
990
+ projectDir: '/tmp/test',
991
+ enableArbitration: true,
992
+ arbitratorProvider: { provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.2 },
993
+ });
994
+
995
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
996
+ const attempted = (runner as any).arbitrationAttempted as Set<string>;
997
+ attempted.add('CONSENSUS_MASTER_PLAN@v1');
998
+
999
+ expect(attempted.has('CONSENSUS_MASTER_PLAN@v1')).toBe(true);
1000
+ });
1001
+ });
1002
+
1003
+ // ─── v2.4.2: Revision Directive Tests ──────────────────────
1004
+
1005
+ describe('revisionDirective in review prompt (v2.4.2)', () => {
1006
+ it('revisionDirective is rendered in review prompt when provided', () => {
1007
+ const packet = makePlanPacket({
1008
+ metadata: {
1009
+ packet_id: 'plan-2',
1010
+ timestamp: new Date().toISOString(),
1011
+ phase: 'CONSENSUS_MASTER_PLAN',
1012
+ submitted_by: 'DISPATCHER',
1013
+ version: 2,
1014
+ },
1015
+ });
1016
+ const directive = 'Fix the authentication flow and add rate limiting';
1017
+ const prompt = buildReviewPrompt(packet, '# Plan', directive);
1018
+
1019
+ expect(prompt).toContain('Prior Feedback (Must Address)');
1020
+ expect(prompt).toContain('Fix the authentication flow');
1021
+ expect(prompt).toContain('Confirm each item above is addressed');
1022
+ });
1023
+
1024
+ it('revisionDirective is NOT rendered when undefined (backward compat)', () => {
1025
+ const packet = makePlanPacket();
1026
+ const prompt = buildReviewPrompt(packet, '# Plan');
1027
+
1028
+ expect(prompt).not.toContain('Prior Feedback (Must Address)');
1029
+ });
1030
+
1031
+ it('revisionDirective is NOT rendered when empty string', () => {
1032
+ const packet = makePlanPacket({
1033
+ metadata: {
1034
+ packet_id: 'plan-2',
1035
+ timestamp: new Date().toISOString(),
1036
+ phase: 'CONSENSUS_MASTER_PLAN',
1037
+ submitted_by: 'DISPATCHER',
1038
+ version: 2,
1039
+ },
1040
+ });
1041
+ const prompt = buildReviewPrompt(packet, '# Plan', ' ');
1042
+
1043
+ expect(prompt).not.toContain('Prior Feedback (Must Address)');
1044
+ });
1045
+
1046
+ it('revisionDirective is truncated at 2000 chars', () => {
1047
+ const packet = makePlanPacket({
1048
+ metadata: {
1049
+ packet_id: 'plan-2',
1050
+ timestamp: new Date().toISOString(),
1051
+ phase: 'CONSENSUS_MASTER_PLAN',
1052
+ submitted_by: 'DISPATCHER',
1053
+ version: 2,
1054
+ },
1055
+ });
1056
+ const longDirective = 'A'.repeat(3000);
1057
+ const prompt = buildReviewPrompt(packet, '# Plan', longDirective);
1058
+
1059
+ expect(prompt).toContain('Prior Feedback (Must Address)');
1060
+ expect(prompt).toContain('[TRUNCATED');
1061
+ // Should not contain full 3000 chars of content
1062
+ const directiveSection = prompt.split('Prior Feedback (Must Address)')[1];
1063
+ expect(directiveSection.indexOf('A'.repeat(2001))).toBe(-1);
1064
+ });
1065
+
1066
+ it('revision notice appears in prompt when version > 1 and mentions "prior issues"', () => {
1067
+ const packet = makePlanPacket({
1068
+ metadata: {
1069
+ packet_id: 'plan-3',
1070
+ timestamp: new Date().toISOString(),
1071
+ phase: 'CONSENSUS_MASTER_PLAN',
1072
+ submitted_by: 'DISPATCHER',
1073
+ version: 3,
1074
+ },
1075
+ });
1076
+ const prompt = buildReviewPrompt(packet, '# Plan');
1077
+
1078
+ expect(prompt).toContain('Revision Notice');
1079
+ expect(prompt).toContain('revision 3');
1080
+ expect(prompt).toContain('prior issues');
1081
+ });
1082
+
1083
+ it('revision notice does NOT appear when version = 1', () => {
1084
+ const packet = makePlanPacket();
1085
+ const prompt = buildReviewPrompt(packet);
1086
+
1087
+ expect(prompt).not.toContain('Revision Notice');
1088
+ });
1089
+ });
1090
+
1091
+ // ─── v2.4.2: getArbitrationTrigger Tests ──────────────────
1092
+
1093
+ describe('getArbitrationTrigger (v2.4.2)', () => {
1094
+ it('returns DISAGREEMENT when votes have mixed APPROVE/REJECT', () => {
1095
+ const votes = [
1096
+ makeVote('r1', 'APPROVE', 0.96),
1097
+ makeVote('r2', 'REJECT', 0.5),
1098
+ ];
1099
+ expect(getArbitrationTrigger(votes, 0.48, 0.95)).toBe('DISAGREEMENT');
1100
+ });
1101
+
1102
+ it('returns BORDERLINE_SCORE when weighted_score within 0.10 of threshold', () => {
1103
+ // All same vote (no disagreement), but score within 0.10 of threshold
1104
+ const votes = [
1105
+ makeVote('r1', 'CONDITIONAL', 0.90),
1106
+ makeVote('r2', 'CONDITIONAL', 0.88),
1107
+ ];
1108
+ // weighted_score 0.89, threshold 0.95, 0.89 >= 0.85 -> BORDERLINE
1109
+ expect(getArbitrationTrigger(votes, 0.89, 0.95)).toBe('BORDERLINE_SCORE');
1110
+ });
1111
+
1112
+ it('returns ALL_CONDITIONAL when all votes conditional with high confidence', () => {
1113
+ const votes: ReviewerVote[] = [
1114
+ { ...makeVote('r1', 'CONDITIONAL', 0.94), blocking_issues: [], required_changes: ['Fix A'] },
1115
+ { ...makeVote('r2', 'CONDITIONAL', 0.95), blocking_issues: [], required_changes: ['Fix B'] },
1116
+ ];
1117
+ // Not DISAGREEMENT (all same), not BORDERLINE (0.44 < 0.85)
1118
+ expect(getArbitrationTrigger(votes, 0.44, 0.95)).toBe('ALL_CONDITIONAL');
1119
+ });
1120
+
1121
+ it('returns NONE when no trigger conditions met', () => {
1122
+ const votes = [
1123
+ makeVote('r1', 'REJECT', 0.3),
1124
+ makeVote('r2', 'REJECT', 0.4),
1125
+ ];
1126
+ // Unanimous REJECT, score 0.0, threshold 0.95 => no trigger
1127
+ expect(getArbitrationTrigger(votes, 0.0, 0.95)).toBe('NONE');
1128
+ });
1129
+
1130
+ it('DISAGREEMENT takes priority over BORDERLINE_SCORE', () => {
1131
+ const votes = [
1132
+ makeVote('r1', 'APPROVE', 0.96),
1133
+ makeVote('r2', 'REJECT', 0.5),
1134
+ ];
1135
+ // weighted_score 0.48 is also borderline of 0.55 threshold, but DISAGREEMENT fires first
1136
+ expect(getArbitrationTrigger(votes, 0.48, 0.55)).toBe('DISAGREEMENT');
1137
+ });
1138
+ });
1139
+
1140
+ // ─── v2.4.2: Arbitrator Rotation Tests ────────────────────
1141
+
1142
+ describe('arbitrator rotation (v2.4.2)', () => {
1143
+ it('arbitrator rotates to OpenAI when default Gemini is a dissenter', () => {
1144
+ const runner = new ConsensusRunner({
1145
+ mode: 'independent',
1146
+ minReviewers: 2,
1147
+ threshold: 0.95,
1148
+ quorum: 2,
1149
+ projectDir: '/tmp/test',
1150
+ enableArbitration: true,
1151
+ arbitratorProvider: { provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.2 },
1152
+ reviewerProviders: [
1153
+ { provider: 'openai', model: 'gpt-4.1', temperature: 0.3 },
1154
+ { provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.3 },
1155
+ ],
1156
+ });
1157
+
1158
+ // Access private callArbitrator via constructing the scenario:
1159
+ // We verify the rotation logic by checking the internal state
1160
+ // The rotation happens inside callArbitrator, which we can't easily unit-test
1161
+ // without mocking the adapter. Instead, verify the runner is properly configured.
1162
+ expect(runner).toBeDefined();
1163
+
1164
+ // Verify the rotation logic directly via the dissenter detection:
1165
+ const votes = [
1166
+ makeVote('r1', 'APPROVE', 0.96),
1167
+ { ...makeVote('r2', 'REJECT', 0.5), provider: 'gemini' },
1168
+ ];
1169
+ const dissentingProviders = new Set(
1170
+ votes.filter(v => v.vote === 'REJECT').map(v => v.provider),
1171
+ );
1172
+ // Default arbitrator (gemini) IS a dissenter
1173
+ expect(dissentingProviders.has('gemini')).toBe(true);
1174
+
1175
+ // Rotation should pick openai (first in fallback order that's configured & not dissenting)
1176
+ const configuredProviders = new Set(['openai', 'gemini']);
1177
+ const ARBITRATOR_FALLBACK_ORDER = ['openai', 'grok', 'gemini'];
1178
+ const alternate = ARBITRATOR_FALLBACK_ORDER.find(
1179
+ p => !dissentingProviders.has(p) && configuredProviders.has(p),
1180
+ );
1181
+ expect(alternate).toBe('openai');
1182
+ });
1183
+
1184
+ it('arbitrator keeps default when default is NOT a dissenter', () => {
1185
+ const votes = [
1186
+ { ...makeVote('r1', 'APPROVE', 0.96), provider: 'openai' },
1187
+ { ...makeVote('r2', 'REJECT', 0.5), provider: 'openai' },
1188
+ ];
1189
+ const dissentingProviders = new Set(
1190
+ votes.filter(v => v.vote === 'REJECT').map(v => v.provider),
1191
+ );
1192
+ // Default arbitrator (gemini) is NOT a dissenter
1193
+ expect(dissentingProviders.has('gemini')).toBe(false);
1194
+ });
1195
+ });
1196
+
1197
+ // ─── v2.4.2: Escalation Tests ─────────────────────────────
1198
+
1199
+ describe('escalation (v2.4.2)', () => {
1200
+ it('escalation would add 3rd reviewer at version >= 3 when only 2 providers', () => {
1201
+ // Test the escalation logic: at v3+ with 2 providers, add a 3rd
1202
+ const providers = [
1203
+ { provider: 'openai', model: 'gpt-4.1', temperature: 0.3 },
1204
+ { provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.3 },
1205
+ ];
1206
+ const version = 3;
1207
+
1208
+ if (version >= 3 && providers.length < 3) {
1209
+ const existingNames = new Set(providers.map(p => p.provider));
1210
+ const candidates = new Set(['openai', 'gemini', 'grok']); // simulated config
1211
+ const PREFERRED_ORDER = ['grok', 'openai', 'gemini'];
1212
+ const tieBreaker = PREFERRED_ORDER.find(p => candidates.has(p) && !existingNames.has(p));
1213
+
1214
+ expect(tieBreaker).toBe('grok');
1215
+ }
1216
+ });
1217
+
1218
+ it('escalation does not add reviewer when already 3+ providers', () => {
1219
+ const providers = [
1220
+ { provider: 'openai', model: 'gpt-4.1', temperature: 0.3 },
1221
+ { provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.3 },
1222
+ { provider: 'grok', model: 'grok-3', temperature: 0.3 },
1223
+ ];
1224
+ // With 3 providers already, no escalation needed
1225
+ expect(providers.length >= 3).toBe(true);
1226
+ });
1227
+
1228
+ it('escalation does not add reviewer at version < 3', () => {
1229
+ const version = 2;
1230
+ expect(version >= 3).toBe(false);
1231
+ });
1232
+ });
1233
+
1234
+ // ─── v2.4.2: No Forced-Approval Tests ─────────────────────
1235
+
1236
+ describe('no forced-approval (v2.4.2)', () => {
1237
+ it('high version still returns honest REJECTED status (no escape hatch)', () => {
1238
+ const votes = [
1239
+ makeVote('r1', 'REJECT', 0.3),
1240
+ makeVote('r2', 'REJECT', 0.4),
1241
+ ];
1242
+ const packet = buildConsensusPacket({
1243
+ planPacketRef: makeRef(),
1244
+ votes,
1245
+ rules: { threshold: 0.95, quorum: 2, min_reviewers: 2 },
1246
+ });
1247
+
1248
+ // Even at high iterations, governance is never bypassed
1249
+ expect(packet.final_status).toBe('REJECTED');
1250
+ expect(packet.consensus_result.approved).toBe(false);
1251
+ });
1252
+ });
1253
+
1254
+ // ─── v2.4.3: parseArbitratorResponse Tests ─────────────────
1255
+
1256
+ describe('parseArbitratorResponse (v2.4.3)', () => {
1257
+ it('parses JSON in code fence with valid schema -> approved: true', () => {
1258
+ const raw = '```json\n' + JSON.stringify({
1259
+ approved: true,
1260
+ reasoning: 'Plan is solid with minor amendments',
1261
+ suggestedChanges: ['Add error handling to /api/users'],
1262
+ }) + '\n```';
1263
+ const result = parseArbitratorResponse(raw);
1264
+
1265
+ expect(result.approved).toBe(true);
1266
+ expect(result.reasoning).toBe('Plan is solid with minor amendments');
1267
+ expect(result.suggestedChanges).toEqual(['Add error handling to /api/users']);
1268
+ });
1269
+
1270
+ it('parses plain JSON without code fence -> approved: true', () => {
1271
+ const raw = JSON.stringify({
1272
+ approved: true,
1273
+ reasoning: 'Acceptable plan',
1274
+ suggestedChanges: [],
1275
+ });
1276
+ const result = parseArbitratorResponse(raw);
1277
+
1278
+ expect(result.approved).toBe(true);
1279
+ expect(result.reasoning).toBe('Acceptable plan');
1280
+ expect(result.suggestedChanges).toEqual([]);
1281
+ });
1282
+
1283
+ it('parses free-form text "approved: true" -> approved: true', () => {
1284
+ const raw = 'After reviewing the plan, I determine approved: true. The plan addresses all major concerns.';
1285
+ const result = parseArbitratorResponse(raw);
1286
+
1287
+ expect(result.approved).toBe(true);
1288
+ });
1289
+
1290
+ it('parses free-form text "APPROVE" -> approved: true, "REJECT" -> approved: false', () => {
1291
+ const approveRaw = 'I APPROVE this plan based on the evidence presented.';
1292
+ expect(parseArbitratorResponse(approveRaw).approved).toBe(true);
1293
+
1294
+ const rejectRaw = 'I must REJECT this plan due to fundamental issues.';
1295
+ expect(parseArbitratorResponse(rejectRaw).approved).toBe(false);
1296
+ });
1297
+
1298
+ it('garbage text -> approved: false (safe default)', () => {
1299
+ const raw = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.';
1300
+ const result = parseArbitratorResponse(raw);
1301
+
1302
+ expect(result.approved).toBe(false);
1303
+ expect(result.reasoning).toBe(raw.slice(0, 2000));
1304
+ });
1305
+
1306
+ it('schema with both suggestedChanges and suggested_changes -> merged, no error', () => {
1307
+ const raw = JSON.stringify({
1308
+ approved: true,
1309
+ reasoning: 'Good plan',
1310
+ suggestedChanges: ['Fix A'],
1311
+ suggested_changes: ['Fix B'],
1312
+ });
1313
+ const result = parseArbitratorResponse(raw);
1314
+
1315
+ expect(result.approved).toBe(true);
1316
+ expect(result.suggestedChanges).toContain('Fix A');
1317
+ expect(result.suggestedChanges).toContain('Fix B');
1318
+ expect(result.suggestedChanges).toHaveLength(2);
1319
+ });
1320
+ });
1321
+
1322
+ // ─── v2.4.3: Gate ARBITRATED Status Tests ───────────────────
1323
+
1324
+ describe('gate engine respects ARBITRATED (v2.4.3)', () => {
1325
+ it('gate passes when finalStatus=ARBITRATED even with score below threshold', async () => {
1326
+ // Simulate: phase handler stores ARBITRATED with low weighted_score
1327
+ const { createGateEngine } = await import('../../src/pipeline/gate-engine.js');
1328
+ const gateEngine = createGateEngine();
1329
+ const pipeline = {
1330
+ pipelinePhase: 'CONSENSUS_MASTER_PLAN' as const,
1331
+ artifacts: [
1332
+ { id: 'c1', type: 'consensus', phase: 'CONSENSUS_MASTER_PLAN', path: '', sha256: '', version: 1, content_type: 'json', timestamp: new Date().toISOString() },
1333
+ ],
1334
+ gateResults: {
1335
+ CONSENSUS_MASTER_PLAN: {
1336
+ phase: 'CONSENSUS_MASTER_PLAN' as const,
1337
+ pass: true,
1338
+ score: 0.60, // below 0.95 threshold
1339
+ blockers: [],
1340
+ missingArtifacts: [],
1341
+ failedChecks: [],
1342
+ consensusScore: 0.50,
1343
+ finalStatus: 'ARBITRATED', // v2.4.3: should override threshold check
1344
+ timestamp: new Date().toISOString(),
1345
+ },
1346
+ },
1347
+ gateChecks: {},
1348
+ recoveryCount: 0,
1349
+ maxRecoveryIterations: 6,
1350
+ skillUsageEvents: [],
1351
+ latestRepoSnapshot: null,
1352
+ };
1353
+
1354
+ const result = gateEngine.evaluateGate('CONSENSUS_MASTER_PLAN', pipeline as any);
1355
+ // Gate should pass because ARBITRATED overrides score check
1356
+ expect(result.pass).toBe(true);
1357
+ });
1358
+
1359
+ it('gate fails when finalStatus=REJECTED and score below threshold', async () => {
1360
+ const { createGateEngine } = await import('../../src/pipeline/gate-engine.js');
1361
+ const gateEngine = createGateEngine();
1362
+ const pipeline = {
1363
+ pipelinePhase: 'CONSENSUS_MASTER_PLAN' as const,
1364
+ artifacts: [
1365
+ { id: 'c1', type: 'consensus', phase: 'CONSENSUS_MASTER_PLAN', path: '', sha256: '', version: 1, content_type: 'json', timestamp: new Date().toISOString() },
1366
+ ],
1367
+ gateResults: {
1368
+ CONSENSUS_MASTER_PLAN: {
1369
+ phase: 'CONSENSUS_MASTER_PLAN' as const,
1370
+ pass: false,
1371
+ score: 0.60,
1372
+ blockers: [],
1373
+ missingArtifacts: [],
1374
+ failedChecks: [],
1375
+ consensusScore: 0.50,
1376
+ finalStatus: 'REJECTED', // NOT arbitrated
1377
+ timestamp: new Date().toISOString(),
1378
+ },
1379
+ },
1380
+ gateChecks: {},
1381
+ recoveryCount: 0,
1382
+ maxRecoveryIterations: 6,
1383
+ skillUsageEvents: [],
1384
+ latestRepoSnapshot: null,
1385
+ };
1386
+
1387
+ const result = gateEngine.evaluateGate('CONSENSUS_MASTER_PLAN', pipeline as any);
1388
+ // Gate should fail because REJECTED + below threshold
1389
+ expect(result.pass).toBe(false);
1390
+ expect(result.blockers.some(b => b.includes('below threshold'))).toBe(true);
1391
+ });
1392
+
1393
+ it('mergeGateResult preserves finalStatus from phase handler', () => {
1394
+ // Simulate the orchestrator merge logic
1395
+ const pipeline = {
1396
+ gateResults: {
1397
+ CONSENSUS_MASTER_PLAN: {
1398
+ phase: 'CONSENSUS_MASTER_PLAN' as const,
1399
+ pass: true,
1400
+ score: 0.80,
1401
+ blockers: [],
1402
+ missingArtifacts: [],
1403
+ failedChecks: [],
1404
+ consensusScore: 0.50,
1405
+ finalStatus: 'ARBITRATED',
1406
+ timestamp: '2024-01-01T00:00:00Z',
1407
+ },
1408
+ },
1409
+ } as any;
1410
+
1411
+ const newGateResult = {
1412
+ phase: 'CONSENSUS_MASTER_PLAN' as const,
1413
+ pass: true,
1414
+ blockers: [],
1415
+ missingArtifacts: [],
1416
+ failedChecks: [],
1417
+ timestamp: '2024-01-01T00:00:01Z',
1418
+ };
1419
+
1420
+ // Simulate mergeGateResult logic
1421
+ const existing = pipeline.gateResults['CONSENSUS_MASTER_PLAN'];
1422
+ pipeline.gateResults['CONSENSUS_MASTER_PLAN'] = {
1423
+ ...newGateResult,
1424
+ score: existing.score ?? newGateResult.score,
1425
+ consensusScore: existing.consensusScore ?? newGateResult.consensusScore,
1426
+ finalStatus: existing.finalStatus ?? newGateResult.finalStatus,
1427
+ };
1428
+
1429
+ expect(pipeline.gateResults['CONSENSUS_MASTER_PLAN'].finalStatus).toBe('ARBITRATED');
1430
+ expect(pipeline.gateResults['CONSENSUS_MASTER_PLAN'].score).toBe(0.80);
1431
+ });
1432
+ });
1433
+
1434
+ // ─── v2.4.4: Version-increment / Arbitration Key Tests ────────
1435
+
1436
+ describe('version-increment arbitration key (v2.4.4)', () => {
1437
+ it('recoveryCount=0 -> version=1 -> arbitration key CONSENSUS_ARCHITECTURE@v1', () => {
1438
+ const recoveryCount = 0;
1439
+ const version = recoveryCount + 1;
1440
+ expect(version).toBe(1);
1441
+
1442
+ const key = `CONSENSUS_ARCHITECTURE@v${version}`;
1443
+ expect(key).toBe('CONSENSUS_ARCHITECTURE@v1');
1444
+ });
1445
+
1446
+ it('recoveryCount=1 -> version=2 -> arbitration key CONSENSUS_ARCHITECTURE@v2', () => {
1447
+ const recoveryCount = 1;
1448
+ const version = recoveryCount + 1;
1449
+ expect(version).toBe(2);
1450
+
1451
+ const key = `CONSENSUS_ARCHITECTURE@v${version}`;
1452
+ expect(key).toBe('CONSENSUS_ARCHITECTURE@v2');
1453
+ });
1454
+
1455
+ it('arbitrationAttempted Set does NOT block second run with different version', () => {
1456
+ const runner = new ConsensusRunner({
1457
+ mode: 'independent',
1458
+ minReviewers: 2,
1459
+ threshold: 0.95,
1460
+ quorum: 2,
1461
+ projectDir: '/tmp/test',
1462
+ enableArbitration: true,
1463
+ arbitratorProvider: { provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.2 },
1464
+ });
1465
+
1466
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1467
+ const attempted = (runner as any).arbitrationAttempted as Set<string>;
1468
+
1469
+ // First run: recoveryCount=0, version=1
1470
+ attempted.add('CONSENSUS_ARCHITECTURE@v1');
1471
+ expect(attempted.has('CONSENSUS_ARCHITECTURE@v1')).toBe(true);
1472
+
1473
+ // Second run after recovery: recoveryCount=1, version=2
1474
+ // Should NOT be blocked by the Set
1475
+ expect(attempted.has('CONSENSUS_ARCHITECTURE@v2')).toBe(false);
1476
+
1477
+ // Same for CONSENSUS_ROLE_PLANS
1478
+ attempted.add('CONSENSUS_ROLE_PLANS@v1');
1479
+ expect(attempted.has('CONSENSUS_ROLE_PLANS@v1')).toBe(true);
1480
+ expect(attempted.has('CONSENSUS_ROLE_PLANS@v2')).toBe(false);
1481
+ });
1482
+
1483
+ it('version=1 (default) blocks retry when version not incremented', () => {
1484
+ const runner = new ConsensusRunner({
1485
+ mode: 'independent',
1486
+ minReviewers: 2,
1487
+ threshold: 0.95,
1488
+ quorum: 2,
1489
+ projectDir: '/tmp/test',
1490
+ enableArbitration: true,
1491
+ arbitratorProvider: { provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.2 },
1492
+ });
1493
+
1494
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1495
+ const attempted = (runner as any).arbitrationAttempted as Set<string>;
1496
+
1497
+ // If version stays at 1 (bug: missing recoveryCount passthrough),
1498
+ // the Set WILL block retry
1499
+ attempted.add('CONSENSUS_ARCHITECTURE@v1');
1500
+ expect(attempted.has('CONSENSUS_ARCHITECTURE@v1')).toBe(true);
1501
+ // This is the bug scenario: version=1 again -> blocked
1502
+ const secondKey = `CONSENSUS_ARCHITECTURE@v${1}`;
1503
+ expect(attempted.has(secondKey)).toBe(true);
1504
+ });
1505
+ });
1506
+
1507
+ // ─── v2.4.3: Stale Master Plan Test ─────────────────────────
1508
+
1509
+ describe('latest master plan artifact (v2.4.3)', () => {
1510
+ it('reverse-find picks latest master_plan not stale v1', () => {
1511
+ const artifacts = [
1512
+ { id: 'mp1', type: 'master_plan', phase: 'INTAKE', version: 1, timestamp: '2024-01-01T00:00:00Z' },
1513
+ { id: 'other', type: 'constitution', phase: 'INTAKE', version: 1, timestamp: '2024-01-01T00:00:01Z' },
1514
+ { id: 'mp2', type: 'master_plan', phase: 'INTAKE', version: 2, timestamp: '2024-01-02T00:00:00Z' },
1515
+ ];
1516
+
1517
+ // Simulates the fix: [...artifacts].reverse().find()
1518
+ const latest = [...artifacts].reverse().find((a) => a.type === 'master_plan');
1519
+ expect(latest).toBeDefined();
1520
+ expect(latest!.id).toBe('mp2');
1521
+ expect(latest!.version).toBe(2);
1522
+
1523
+ // Verify old .find() would have returned stale v1
1524
+ const stale = artifacts.find((a) => a.type === 'master_plan');
1525
+ expect(stale!.id).toBe('mp1');
1526
+ expect(stale!.version).toBe(1);
1527
+ });
1528
+ });
206
1529
  });