popeye-cli 2.2.0 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. package/dist/adapters/gemini.d.ts +14 -0
  2. package/dist/adapters/gemini.d.ts.map +1 -1
  3. package/dist/adapters/gemini.js +41 -6
  4. package/dist/adapters/gemini.js.map +1 -1
  5. package/dist/adapters/grok.d.ts +14 -0
  6. package/dist/adapters/grok.d.ts.map +1 -1
  7. package/dist/adapters/grok.js +42 -6
  8. package/dist/adapters/grok.js.map +1 -1
  9. package/dist/adapters/openai.d.ts +10 -0
  10. package/dist/adapters/openai.d.ts.map +1 -1
  11. package/dist/adapters/openai.js +44 -5
  12. package/dist/adapters/openai.js.map +1 -1
  13. package/dist/cli/commands/create.js +1 -1
  14. package/dist/cli/commands/create.js.map +1 -1
  15. package/dist/cli/interactive.d.ts.map +1 -1
  16. package/dist/cli/interactive.js +324 -20
  17. package/dist/cli/interactive.js.map +1 -1
  18. package/dist/generators/all.d.ts.map +1 -1
  19. package/dist/generators/all.js +3 -2
  20. package/dist/generators/all.js.map +1 -1
  21. package/dist/generators/doc-parser.d.ts +21 -6
  22. package/dist/generators/doc-parser.d.ts.map +1 -1
  23. package/dist/generators/doc-parser.js +55 -4
  24. package/dist/generators/doc-parser.js.map +1 -1
  25. package/dist/generators/templates/fullstack.js +1 -1
  26. package/dist/generators/templates/website-components.js +1 -1
  27. package/dist/generators/templates/website-components.js.map +1 -1
  28. package/dist/generators/templates/website-config.d.ts +4 -1
  29. package/dist/generators/templates/website-config.d.ts.map +1 -1
  30. package/dist/generators/templates/website-config.js +17 -11
  31. package/dist/generators/templates/website-config.js.map +1 -1
  32. package/dist/generators/templates/website-conversion.js +1 -1
  33. package/dist/generators/templates/website-conversion.js.map +1 -1
  34. package/dist/generators/templates/website-landing.js +1 -1
  35. package/dist/generators/templates/website-landing.js.map +1 -1
  36. package/dist/generators/templates/website-layout.d.ts +36 -4
  37. package/dist/generators/templates/website-layout.d.ts.map +1 -1
  38. package/dist/generators/templates/website-layout.js +466 -23
  39. package/dist/generators/templates/website-layout.js.map +1 -1
  40. package/dist/generators/templates/website-pricing.js +1 -1
  41. package/dist/generators/templates/website-pricing.js.map +1 -1
  42. package/dist/generators/templates/website-sections.js +1 -1
  43. package/dist/generators/templates/website-sections.js.map +1 -1
  44. package/dist/generators/templates/website-seo.d.ts.map +1 -1
  45. package/dist/generators/templates/website-seo.js +4 -1
  46. package/dist/generators/templates/website-seo.js.map +1 -1
  47. package/dist/generators/templates/website.d.ts +1 -1
  48. package/dist/generators/templates/website.d.ts.map +1 -1
  49. package/dist/generators/templates/website.js +1 -1
  50. package/dist/generators/templates/website.js.map +1 -1
  51. package/dist/generators/website-content-ai.d.ts +52 -0
  52. package/dist/generators/website-content-ai.d.ts.map +1 -0
  53. package/dist/generators/website-content-ai.js +141 -0
  54. package/dist/generators/website-content-ai.js.map +1 -0
  55. package/dist/generators/website-content-scanner.d.ts +1 -1
  56. package/dist/generators/website-content-scanner.d.ts.map +1 -1
  57. package/dist/generators/website-content-scanner.js +98 -1
  58. package/dist/generators/website-content-scanner.js.map +1 -1
  59. package/dist/generators/website-context.d.ts +34 -1
  60. package/dist/generators/website-context.d.ts.map +1 -1
  61. package/dist/generators/website-context.js +131 -9
  62. package/dist/generators/website-context.js.map +1 -1
  63. package/dist/generators/website-debug.d.ts +12 -0
  64. package/dist/generators/website-debug.d.ts.map +1 -1
  65. package/dist/generators/website-debug.js +16 -0
  66. package/dist/generators/website-debug.js.map +1 -1
  67. package/dist/generators/website.d.ts.map +1 -1
  68. package/dist/generators/website.js +26 -4
  69. package/dist/generators/website.js.map +1 -1
  70. package/dist/pipeline/auto-recovery.d.ts +56 -0
  71. package/dist/pipeline/auto-recovery.d.ts.map +1 -0
  72. package/dist/pipeline/auto-recovery.js +185 -0
  73. package/dist/pipeline/auto-recovery.js.map +1 -0
  74. package/dist/pipeline/change-request.d.ts +39 -0
  75. package/dist/pipeline/change-request.d.ts.map +1 -1
  76. package/dist/pipeline/change-request.js +40 -1
  77. package/dist/pipeline/change-request.js.map +1 -1
  78. package/dist/pipeline/check-runner.d.ts +30 -1
  79. package/dist/pipeline/check-runner.d.ts.map +1 -1
  80. package/dist/pipeline/check-runner.js +122 -1
  81. package/dist/pipeline/check-runner.js.map +1 -1
  82. package/dist/pipeline/command-resolver.d.ts.map +1 -1
  83. package/dist/pipeline/command-resolver.js +33 -2
  84. package/dist/pipeline/command-resolver.js.map +1 -1
  85. package/dist/pipeline/consensus/arbitrator-query.d.ts +22 -0
  86. package/dist/pipeline/consensus/arbitrator-query.d.ts.map +1 -0
  87. package/dist/pipeline/consensus/arbitrator-query.js +70 -0
  88. package/dist/pipeline/consensus/arbitrator-query.js.map +1 -0
  89. package/dist/pipeline/consensus/consensus-runner.d.ts +131 -7
  90. package/dist/pipeline/consensus/consensus-runner.d.ts.map +1 -1
  91. package/dist/pipeline/consensus/consensus-runner.js +809 -35
  92. package/dist/pipeline/consensus/consensus-runner.js.map +1 -1
  93. package/dist/pipeline/cr-lifecycle.d.ts +42 -0
  94. package/dist/pipeline/cr-lifecycle.d.ts.map +1 -0
  95. package/dist/pipeline/cr-lifecycle.js +89 -0
  96. package/dist/pipeline/cr-lifecycle.js.map +1 -0
  97. package/dist/pipeline/gate-engine.d.ts +1 -0
  98. package/dist/pipeline/gate-engine.d.ts.map +1 -1
  99. package/dist/pipeline/gate-engine.js +26 -7
  100. package/dist/pipeline/gate-engine.js.map +1 -1
  101. package/dist/pipeline/orchestrator.d.ts +1 -1
  102. package/dist/pipeline/orchestrator.d.ts.map +1 -1
  103. package/dist/pipeline/orchestrator.js +306 -16
  104. package/dist/pipeline/orchestrator.js.map +1 -1
  105. package/dist/pipeline/packets/consensus-packet-builder.d.ts +15 -4
  106. package/dist/pipeline/packets/consensus-packet-builder.d.ts.map +1 -1
  107. package/dist/pipeline/packets/consensus-packet-builder.js +29 -17
  108. package/dist/pipeline/packets/consensus-packet-builder.js.map +1 -1
  109. package/dist/pipeline/phases/architecture.d.ts.map +1 -1
  110. package/dist/pipeline/phases/architecture.js +5 -3
  111. package/dist/pipeline/phases/architecture.js.map +1 -1
  112. package/dist/pipeline/phases/audit.d.ts.map +1 -1
  113. package/dist/pipeline/phases/audit.js +5 -3
  114. package/dist/pipeline/phases/audit.js.map +1 -1
  115. package/dist/pipeline/phases/consensus-architecture.d.ts.map +1 -1
  116. package/dist/pipeline/phases/consensus-architecture.js +10 -1
  117. package/dist/pipeline/phases/consensus-architecture.js.map +1 -1
  118. package/dist/pipeline/phases/consensus-master-plan.d.ts.map +1 -1
  119. package/dist/pipeline/phases/consensus-master-plan.js +10 -3
  120. package/dist/pipeline/phases/consensus-master-plan.js.map +1 -1
  121. package/dist/pipeline/phases/consensus-role-plans.d.ts.map +1 -1
  122. package/dist/pipeline/phases/consensus-role-plans.js +10 -1
  123. package/dist/pipeline/phases/consensus-role-plans.js.map +1 -1
  124. package/dist/pipeline/phases/done.d.ts.map +1 -1
  125. package/dist/pipeline/phases/done.js +9 -4
  126. package/dist/pipeline/phases/done.js.map +1 -1
  127. package/dist/pipeline/phases/intake.d.ts.map +1 -1
  128. package/dist/pipeline/phases/intake.js +7 -3
  129. package/dist/pipeline/phases/intake.js.map +1 -1
  130. package/dist/pipeline/phases/phase-context.d.ts +2 -0
  131. package/dist/pipeline/phases/phase-context.d.ts.map +1 -1
  132. package/dist/pipeline/phases/phase-context.js +3 -1
  133. package/dist/pipeline/phases/phase-context.js.map +1 -1
  134. package/dist/pipeline/phases/production-gate.d.ts.map +1 -1
  135. package/dist/pipeline/phases/production-gate.js +28 -3
  136. package/dist/pipeline/phases/production-gate.js.map +1 -1
  137. package/dist/pipeline/phases/qa-validation.d.ts.map +1 -1
  138. package/dist/pipeline/phases/qa-validation.js +38 -5
  139. package/dist/pipeline/phases/qa-validation.js.map +1 -1
  140. package/dist/pipeline/phases/recovery-loop.d.ts +2 -0
  141. package/dist/pipeline/phases/recovery-loop.d.ts.map +1 -1
  142. package/dist/pipeline/phases/recovery-loop.js +200 -6
  143. package/dist/pipeline/phases/recovery-loop.js.map +1 -1
  144. package/dist/pipeline/phases/review.d.ts.map +1 -1
  145. package/dist/pipeline/phases/review.js +58 -28
  146. package/dist/pipeline/phases/review.js.map +1 -1
  147. package/dist/pipeline/phases/role-planning.d.ts.map +1 -1
  148. package/dist/pipeline/phases/role-planning.js +18 -2
  149. package/dist/pipeline/phases/role-planning.js.map +1 -1
  150. package/dist/pipeline/phases/stuck.d.ts.map +1 -1
  151. package/dist/pipeline/phases/stuck.js +10 -0
  152. package/dist/pipeline/phases/stuck.js.map +1 -1
  153. package/dist/pipeline/repo-snapshot.d.ts.map +1 -1
  154. package/dist/pipeline/repo-snapshot.js +3 -0
  155. package/dist/pipeline/repo-snapshot.js.map +1 -1
  156. package/dist/pipeline/role-execution-adapter.d.ts +2 -1
  157. package/dist/pipeline/role-execution-adapter.d.ts.map +1 -1
  158. package/dist/pipeline/role-execution-adapter.js +22 -7
  159. package/dist/pipeline/role-execution-adapter.js.map +1 -1
  160. package/dist/pipeline/skill-loader.d.ts +19 -0
  161. package/dist/pipeline/skill-loader.d.ts.map +1 -1
  162. package/dist/pipeline/skill-loader.js +22 -0
  163. package/dist/pipeline/skill-loader.js.map +1 -1
  164. package/dist/pipeline/skills/coverage-gate.d.ts +44 -0
  165. package/dist/pipeline/skills/coverage-gate.d.ts.map +1 -0
  166. package/dist/pipeline/skills/coverage-gate.js +143 -0
  167. package/dist/pipeline/skills/coverage-gate.js.map +1 -0
  168. package/dist/pipeline/skills/usage-registry.d.ts +48 -0
  169. package/dist/pipeline/skills/usage-registry.d.ts.map +1 -0
  170. package/dist/pipeline/skills/usage-registry.js +55 -0
  171. package/dist/pipeline/skills/usage-registry.js.map +1 -0
  172. package/dist/pipeline/strategy-context.d.ts +20 -0
  173. package/dist/pipeline/strategy-context.d.ts.map +1 -0
  174. package/dist/pipeline/strategy-context.js +55 -0
  175. package/dist/pipeline/strategy-context.js.map +1 -0
  176. package/dist/pipeline/type-defs/artifacts.d.ts +25 -5
  177. package/dist/pipeline/type-defs/artifacts.d.ts.map +1 -1
  178. package/dist/pipeline/type-defs/artifacts.js +4 -0
  179. package/dist/pipeline/type-defs/artifacts.js.map +1 -1
  180. package/dist/pipeline/type-defs/audit.d.ts +25 -13
  181. package/dist/pipeline/type-defs/audit.d.ts.map +1 -1
  182. package/dist/pipeline/type-defs/checks.d.ts +18 -8
  183. package/dist/pipeline/type-defs/checks.d.ts.map +1 -1
  184. package/dist/pipeline/type-defs/checks.js +4 -0
  185. package/dist/pipeline/type-defs/checks.js.map +1 -1
  186. package/dist/pipeline/type-defs/packets.d.ts +104 -18
  187. package/dist/pipeline/type-defs/packets.d.ts.map +1 -1
  188. package/dist/pipeline/type-defs/packets.js +17 -1
  189. package/dist/pipeline/type-defs/packets.js.map +1 -1
  190. package/dist/pipeline/type-defs/state.d.ts +160 -16
  191. package/dist/pipeline/type-defs/state.d.ts.map +1 -1
  192. package/dist/pipeline/type-defs/state.js +26 -1
  193. package/dist/pipeline/type-defs/state.js.map +1 -1
  194. package/dist/shared/text-utils.d.ts +23 -0
  195. package/dist/shared/text-utils.d.ts.map +1 -0
  196. package/dist/shared/text-utils.js +66 -0
  197. package/dist/shared/text-utils.js.map +1 -0
  198. package/dist/shared/website-strategy-format.d.ts +18 -0
  199. package/dist/shared/website-strategy-format.d.ts.map +1 -0
  200. package/dist/shared/website-strategy-format.js +47 -0
  201. package/dist/shared/website-strategy-format.js.map +1 -0
  202. package/dist/state/index.d.ts +2 -0
  203. package/dist/state/index.d.ts.map +1 -1
  204. package/dist/state/index.js +57 -8
  205. package/dist/state/index.js.map +1 -1
  206. package/dist/types/consensus.d.ts +1 -0
  207. package/dist/types/consensus.d.ts.map +1 -1
  208. package/dist/types/consensus.js.map +1 -1
  209. package/dist/types/website-strategy.d.ts +1 -1
  210. package/dist/types/workflow.d.ts +447 -0
  211. package/dist/types/workflow.d.ts.map +1 -1
  212. package/dist/types/workflow.js +3 -0
  213. package/dist/types/workflow.js.map +1 -1
  214. package/dist/upgrade/handlers.d.ts.map +1 -1
  215. package/dist/upgrade/handlers.js +6 -3
  216. package/dist/upgrade/handlers.js.map +1 -1
  217. package/dist/workflow/consensus.d.ts.map +1 -1
  218. package/dist/workflow/consensus.js +1 -0
  219. package/dist/workflow/consensus.js.map +1 -1
  220. package/dist/workflow/website-strategy.d.ts.map +1 -1
  221. package/dist/workflow/website-strategy.js +2 -29
  222. package/dist/workflow/website-strategy.js.map +1 -1
  223. package/dist/workflow/website-updater.d.ts.map +1 -1
  224. package/dist/workflow/website-updater.js +3 -2
  225. package/dist/workflow/website-updater.js.map +1 -1
  226. package/package.json +1 -1
  227. package/src/adapters/gemini.ts +51 -6
  228. package/src/adapters/grok.ts +51 -6
  229. package/src/adapters/openai.ts +53 -5
  230. package/src/cli/commands/create.ts +1 -1
  231. package/src/cli/interactive.ts +333 -19
  232. package/src/generators/all.ts +3 -2
  233. package/src/generators/doc-parser.ts +75 -15
  234. package/src/generators/templates/fullstack.ts +1 -1
  235. package/src/generators/templates/website-components.ts +1 -1
  236. package/src/generators/templates/website-config.ts +23 -11
  237. package/src/generators/templates/website-conversion.ts +1 -1
  238. package/src/generators/templates/website-landing.ts +1 -1
  239. package/src/generators/templates/website-layout.ts +491 -23
  240. package/src/generators/templates/website-pricing.ts +1 -1
  241. package/src/generators/templates/website-sections.ts +1 -1
  242. package/src/generators/templates/website-seo.ts +4 -1
  243. package/src/generators/templates/website.ts +3 -0
  244. package/src/generators/website-content-ai.ts +186 -0
  245. package/src/generators/website-content-scanner.ts +113 -1
  246. package/src/generators/website-context.ts +151 -12
  247. package/src/generators/website-debug.ts +26 -0
  248. package/src/generators/website.ts +28 -3
  249. package/src/pipeline/auto-recovery.ts +283 -0
  250. package/src/pipeline/change-request.ts +63 -1
  251. package/src/pipeline/check-runner.ts +141 -2
  252. package/src/pipeline/command-resolver.ts +34 -2
  253. package/src/pipeline/consensus/arbitrator-query.ts +101 -0
  254. package/src/pipeline/consensus/consensus-runner.ts +1099 -42
  255. package/src/pipeline/cr-lifecycle.ts +103 -0
  256. package/src/pipeline/gate-engine.ts +35 -7
  257. package/src/pipeline/orchestrator.ts +361 -16
  258. package/src/pipeline/packets/consensus-packet-builder.ts +44 -18
  259. package/src/pipeline/phases/architecture.ts +6 -3
  260. package/src/pipeline/phases/audit.ts +6 -3
  261. package/src/pipeline/phases/consensus-architecture.ts +10 -1
  262. package/src/pipeline/phases/consensus-master-plan.ts +10 -3
  263. package/src/pipeline/phases/consensus-role-plans.ts +10 -1
  264. package/src/pipeline/phases/done.ts +15 -4
  265. package/src/pipeline/phases/intake.ts +7 -3
  266. package/src/pipeline/phases/phase-context.ts +6 -1
  267. package/src/pipeline/phases/production-gate.ts +41 -3
  268. package/src/pipeline/phases/qa-validation.ts +51 -5
  269. package/src/pipeline/phases/recovery-loop.ts +229 -7
  270. package/src/pipeline/phases/review.ts +73 -30
  271. package/src/pipeline/phases/role-planning.ts +21 -2
  272. package/src/pipeline/phases/stuck.ts +10 -0
  273. package/src/pipeline/repo-snapshot.ts +3 -0
  274. package/src/pipeline/role-execution-adapter.ts +30 -4
  275. package/src/pipeline/skill-loader.ts +33 -0
  276. package/src/pipeline/skills/coverage-gate.ts +199 -0
  277. package/src/pipeline/skills/usage-registry.ts +87 -0
  278. package/src/pipeline/strategy-context.ts +60 -0
  279. package/src/pipeline/type-defs/artifacts.ts +4 -0
  280. package/src/pipeline/type-defs/checks.ts +4 -0
  281. package/src/pipeline/type-defs/packets.ts +18 -1
  282. package/src/pipeline/type-defs/state.ts +26 -1
  283. package/src/shared/text-utils.ts +70 -0
  284. package/src/shared/website-strategy-format.ts +56 -0
  285. package/src/state/index.ts +60 -8
  286. package/src/types/consensus.ts +1 -0
  287. package/src/types/workflow.ts +6 -0
  288. package/src/upgrade/handlers.ts +9 -3
  289. package/src/workflow/consensus.ts +1 -0
  290. package/src/workflow/website-strategy.ts +2 -36
  291. package/src/workflow/website-updater.ts +4 -2
  292. package/tests/adapters/gemini.test.ts +165 -0
  293. package/tests/adapters/grok.test.ts +137 -0
  294. package/tests/adapters/openai.test.ts +128 -0
  295. package/tests/generators/doc-parser.test.ts +88 -9
  296. package/tests/generators/quality-gate.test.ts +19 -3
  297. package/tests/generators/website-components.test.ts +34 -0
  298. package/tests/generators/website-content-ai.test.ts +308 -0
  299. package/tests/generators/website-content-scanner.test.ts +86 -0
  300. package/tests/generators/website-context.test.ts +3 -2
  301. package/tests/integration/smokestack-scaffold.test.ts +385 -0
  302. package/tests/pipeline/auto-recovery.test.ts +337 -0
  303. package/tests/pipeline/change-request.test.ts +70 -0
  304. package/tests/pipeline/command-resolver.test.ts +42 -0
  305. package/tests/pipeline/consensus/arbitrator-query.test.ts +107 -0
  306. package/tests/pipeline/consensus-runner.test.ts +1333 -10
  307. package/tests/pipeline/consensus-scoring.test.ts +602 -18
  308. package/tests/pipeline/gate-engine.test.ts +34 -0
  309. package/tests/pipeline/install-check.test.ts +261 -0
  310. package/tests/pipeline/orchestrator.test.ts +1506 -15
  311. package/tests/pipeline/packets/builders.test.ts +29 -6
  312. package/tests/pipeline/phases/role-planning.strategy.test.ts +204 -0
  313. package/tests/pipeline/pipeline-persistence.test.ts +230 -0
  314. package/tests/pipeline/recovery-loop-guidance.test.ts +280 -0
  315. package/tests/pipeline/role-execution-adapter.test.ts +88 -0
  316. package/tests/pipeline/skills/coverage-gate.test.ts +370 -0
  317. package/tests/pipeline/skills/usage-registry.test.ts +114 -0
  318. package/tests/pipeline/strategy-context.test.ts +148 -0
  319. package/tests/shared/text-utils.test.ts +155 -0
  320. package/tests/state/progress-analysis.test.ts +375 -0
  321. package/tests/upgrade/handlers.test.ts +33 -2
  322. package/tests/workflow/consensus.test.ts +6 -0
  323. package/tsconfig.json +1 -1
@@ -6,9 +6,16 @@
6
6
  * 1. Independent Review (DEFAULT): N reviewers review simultaneously,
7
7
  * no reviewer sees other reviewers' output.
8
8
  * 2. Iterative Consensus (optional): for recovery plan iteration.
9
+ *
10
+ * v2.1: Vote normalization pipeline, tag reclassification, hard-blocker
11
+ * detection, config-driven arbitration, reviewer rubric.
9
12
  */
10
13
 
11
14
  import { createHash } from 'node:crypto';
15
+ import logging from 'node:console';
16
+ import { existsSync, readFileSync } from 'node:fs';
17
+ import { resolve } from 'node:path';
18
+ import { z } from 'zod';
12
19
 
13
20
  import type {
14
21
  PlanPacket,
@@ -17,11 +24,275 @@ import type {
17
24
  } from '../types.js';
18
25
  import type { GateDefinition } from '../gate-engine.js';
19
26
  import { buildConsensusPacket } from '../packets/consensus-packet-builder.js';
20
- import type { ConsensusRules } from '../packets/consensus-packet-builder.js';
27
+ import type { ConsensusRules, NormalizationSummary } from '../packets/consensus-packet-builder.js';
28
+ import { isNoneVariant } from '../../shared/text-utils.js';
29
+ import { queryProvider } from './arbitrator-query.js';
21
30
 
22
31
  // Re-use existing consensus infrastructure
23
32
  import { iterateUntilConsensus } from '../../workflow/consensus.js';
24
- import type { ConsensusConfig, ConsensusResult } from '../../types/consensus.js';
33
+ import type { ConsensusConfig, ArbitrationResult } from '../../types/consensus.js';
34
+
35
+ const logger = logging;
36
+
37
+ // ─── Hard Blocker Patterns ───────────────────────────────
38
+ // Module-level const so both containsHardBlockerPatterns() and
39
+ // the forced-REJECT block in normalizeVoteBlockers() can reference it.
40
+
41
+ const HARD_BLOCKER_PATTERNS: RegExp[] = [
42
+ /\bsql injection\b/i,
43
+ /\bxss\b/i,
44
+ /\bsecurity vulnerabilit(?:y|ies)\b/i,
45
+ /\b(?:build|tests?)\s+(?:is|are\s+)?failing\b/i,
46
+ /\bfails?\s+(?:in\s+)?(?:ci|pipeline|compilation)\b/i,
47
+ /\bdata loss\b/i,
48
+ /\bcritical\s+(?:bug|defect|error)\b/i,
49
+ ];
50
+
51
+ // ─── Tag Classification ──────────────────────────────────
52
+
53
+ interface TagClassification {
54
+ blockers: string[];
55
+ required: string[];
56
+ suggestions: string[];
57
+ untagged: Array<{ text: string; origin: 'blocking' | 'required' | 'suggestion' }>;
58
+ }
59
+
60
+ function stripTag(s: string): string {
61
+ return s.replace(/^\[(BLOCKER|REQUIRED|SUGGESTION)\]\s*/i, '');
62
+ }
63
+
64
+ /**
65
+ * Pool ALL issue lists, classify by tag prefix.
66
+ * Untagged items retain their origin field for downstream routing.
67
+ */
68
+ function parseTaggedIssues(
69
+ blockingIssues: string[],
70
+ requiredChanges: string[],
71
+ suggestions: string[],
72
+ ): TagClassification {
73
+ const result: TagClassification = {
74
+ blockers: [], required: [], suggestions: [], untagged: [],
75
+ };
76
+
77
+ function classify(items: string[], origin: 'blocking' | 'required' | 'suggestion') {
78
+ for (const issue of items) {
79
+ const trimmed = issue.trim();
80
+ if (!trimmed) continue;
81
+ if (/^\[BLOCKER\]/i.test(trimmed)) result.blockers.push(stripTag(trimmed));
82
+ else if (/^\[REQUIRED\]/i.test(trimmed)) result.required.push(stripTag(trimmed));
83
+ else if (/^\[SUGGESTION\]/i.test(trimmed)) result.suggestions.push(stripTag(trimmed));
84
+ else result.untagged.push({ text: trimmed, origin });
85
+ }
86
+ }
87
+
88
+ classify(blockingIssues, 'blocking');
89
+ classify(requiredChanges, 'required');
90
+ classify(suggestions, 'suggestion');
91
+ return result;
92
+ }
93
+
94
+ // ─── Normalization Helpers ───────────────────────────────
95
+
96
+ const cleanText = (s: string): string => stripTag(s.trim());
97
+
98
+ const cleanList = (arr: string[]): string[] => {
99
+ const out: string[] = [];
100
+ const seen = new Set<string>();
101
+ for (const raw of arr) {
102
+ const s = cleanText(raw);
103
+ if (!s || isNoneVariant(s) || seen.has(s)) continue;
104
+ seen.add(s);
105
+ out.push(s);
106
+ }
107
+ return out;
108
+ };
109
+
110
+ function containsHardBlockerPatterns(issues: string[]): boolean {
111
+ return issues.some(issue => HARD_BLOCKER_PATTERNS.some(p => p.test(issue)));
112
+ }
113
+
114
+ // ─── Vote Normalization Pipeline ─────────────────────────
115
+
116
+ /**
117
+ * Normalize votes: pool → classify by tag → detect hard blockers → route by vote → dedup.
118
+ * Called after collecting votes, before buildConsensusPacket().
119
+ * Idempotent: running twice produces the same result.
120
+ */
121
+ export function normalizeVoteBlockers(
122
+ votes: ReviewerVote[],
123
+ ): { votes: ReviewerVote[]; summary: NormalizationSummary } {
124
+ const summary: NormalizationSummary = {
125
+ tagged_blockers_demoted_to_suggestions: 0,
126
+ tagged_blockers_demoted_to_required: 0,
127
+ untagged_from_blocking_routed_to_required: 0,
128
+ forced_rejects: 0,
129
+ };
130
+
131
+ const normalized = votes.map((v) => {
132
+ // Step 1: Pool ALL issue lists, classify by tag
133
+ const tagged = parseTaggedIssues(
134
+ v.blocking_issues.filter(i => !isNoneVariant(i)),
135
+ (v.required_changes ?? []),
136
+ v.suggestions,
137
+ );
138
+
139
+ // Step 2: Contradiction guard — scan ALL pooled text for hard blockers
140
+ const hasTaggedBlocker = tagged.blockers.length > 0;
141
+ const allPooledText = [
142
+ ...tagged.blockers, ...tagged.required, ...tagged.suggestions,
143
+ ...tagged.untagged.map(u => u.text),
144
+ ].map(cleanText);
145
+ const hasHardPattern = containsHardBlockerPatterns(allPooledText);
146
+
147
+ // v2.4.4: Vote-aware contradiction guard
148
+ // Principle:
149
+ // APPROVE + any hard pattern anywhere = genuinely inconsistent -> force REJECT
150
+ // CONDITIONAL = force REJECT only if [BLOCKER] tag OR hard pattern in blocker-origin text
151
+ // REJECT = already reject, no forcing needed
152
+ const hasHardPatternAnywhere = hasHardPattern; // already computed above (allPooledText)
153
+ const blockerOriginText = [
154
+ ...tagged.blockers,
155
+ ...tagged.untagged.filter(u => u.origin === 'blocking').map(u => u.text),
156
+ ].map(cleanText);
157
+ const hasHardPatternInBlockers = containsHardBlockerPatterns(blockerOriginText);
158
+
159
+ const forceReject =
160
+ (v.vote === 'APPROVE' && (hasTaggedBlocker || hasHardPatternAnywhere)) ||
161
+ (v.vote === 'CONDITIONAL' && (hasTaggedBlocker || hasHardPatternInBlockers));
162
+
163
+ if (forceReject) {
164
+ summary.forced_rejects++;
165
+
166
+ // Debug logging for forced-reject diagnosis
167
+ logger.log(
168
+ `[consensus] Forced REJECT: vote=${v.vote} reviewer=${v.reviewer_id} ` +
169
+ `hasTaggedBlocker=${hasTaggedBlocker} hasHardPatternAnywhere=${hasHardPatternAnywhere} ` +
170
+ `hasHardPatternInBlockers=${hasHardPatternInBlockers}`,
171
+ );
172
+
173
+ // Build minimal hard-blocker set: tagged blockers + any text matching patterns
174
+ const hardBlockers = [
175
+ ...tagged.blockers,
176
+ ...tagged.untagged.map(u => u.text).filter(t => HARD_BLOCKER_PATTERNS.some(p => p.test(t))),
177
+ ...tagged.required.filter(t => HARD_BLOCKER_PATTERNS.some(p => p.test(t))),
178
+ ...tagged.suggestions.filter(t => HARD_BLOCKER_PATTERNS.some(p => p.test(t))),
179
+ ];
180
+ // Non-hard items go to required_changes
181
+ const nonHard = [
182
+ ...tagged.required.filter(t => !HARD_BLOCKER_PATTERNS.some(p => p.test(t))),
183
+ ...tagged.untagged.filter(u => u.origin === 'required').map(u => u.text),
184
+ ...tagged.untagged.filter(u => u.origin === 'blocking').map(u => u.text)
185
+ .filter(t => !HARD_BLOCKER_PATTERNS.some(p => p.test(t))),
186
+ ];
187
+ const nonHardSuggestions = [
188
+ ...tagged.suggestions.filter(t => !HARD_BLOCKER_PATTERNS.some(p => p.test(t))),
189
+ ...tagged.untagged.filter(u => u.origin === 'suggestion').map(u => u.text),
190
+ ];
191
+ return {
192
+ ...v,
193
+ vote: 'REJECT' as const,
194
+ blocking_issues: cleanList(hardBlockers),
195
+ required_changes: cleanList(nonHard),
196
+ suggestions: cleanList(nonHardSuggestions),
197
+ reviewer_inconsistency: true,
198
+ };
199
+ }
200
+
201
+ // Step 3: Vote-consistent routing for untagged items
202
+ switch (v.vote) {
203
+ case 'APPROVE': {
204
+ // APPROVE = execution-ready. Tagged blockers → suggestions. All untagged → suggestions.
205
+ summary.tagged_blockers_demoted_to_suggestions += tagged.blockers.length;
206
+ return {
207
+ ...v,
208
+ blocking_issues: [] as string[],
209
+ required_changes: cleanList([...tagged.required]),
210
+ suggestions: cleanList([
211
+ ...tagged.suggestions,
212
+ ...tagged.blockers,
213
+ ...tagged.untagged.map(u => u.text),
214
+ ]),
215
+ };
216
+ }
217
+ case 'CONDITIONAL': {
218
+ // CONDITIONAL: tagged blockers → required_changes, untagged-from-blocking → required_changes
219
+ summary.tagged_blockers_demoted_to_required += tagged.blockers.length;
220
+ summary.untagged_from_blocking_routed_to_required += tagged.untagged.filter(u => u.origin === 'blocking').length;
221
+ return {
222
+ ...v,
223
+ blocking_issues: [] as string[],
224
+ required_changes: cleanList([
225
+ ...tagged.required,
226
+ ...tagged.blockers,
227
+ ...tagged.untagged.filter(u => u.origin === 'blocking').map(u => u.text),
228
+ ...tagged.untagged.filter(u => u.origin === 'required').map(u => u.text),
229
+ ]),
230
+ suggestions: cleanList([
231
+ ...tagged.suggestions,
232
+ ...tagged.untagged.filter(u => u.origin === 'suggestion').map(u => u.text),
233
+ ]),
234
+ };
235
+ }
236
+ case 'REJECT': {
237
+ // REJECT: untagged-from-blocking stays as blockers
238
+ return {
239
+ ...v,
240
+ blocking_issues: cleanList([
241
+ ...tagged.blockers,
242
+ ...tagged.untagged.filter(u => u.origin === 'blocking').map(u => u.text),
243
+ ]),
244
+ required_changes: cleanList([
245
+ ...tagged.required,
246
+ ...tagged.untagged.filter(u => u.origin === 'required').map(u => u.text),
247
+ ]),
248
+ suggestions: cleanList([
249
+ ...tagged.suggestions,
250
+ ...tagged.untagged.filter(u => u.origin === 'suggestion').map(u => u.text),
251
+ ]),
252
+ };
253
+ }
254
+ }
255
+ });
256
+
257
+ return { votes: normalized, summary };
258
+ }
259
+
260
+ // ─── Vote Mapping ────────────────────────────────────────
261
+
262
+ /**
263
+ * Floor confidence score for CONDITIONAL votes.
264
+ * Matches adapter rubric: 80-94% = "minor revisions needed".
265
+ */
266
+ export const DEFAULT_CONDITIONAL_FLOOR = 0.80;
267
+
268
+ /**
269
+ * Map a reviewer's confidence score (0-1) to a structured vote.
270
+ * Threshold-aware: APPROVE = meets gate bar, CONDITIONAL = iterate, REJECT = major rework.
271
+ */
272
+ export function mapVote(
273
+ confidence: number,
274
+ threshold: number,
275
+ conditionalFloor: number = DEFAULT_CONDITIONAL_FLOOR,
276
+ ): 'APPROVE' | 'CONDITIONAL' | 'REJECT' {
277
+ const c = Math.max(0, Math.min(1, confidence));
278
+ const t = Math.max(0, Math.min(1, threshold));
279
+ const f = Math.max(0, Math.min(t, conditionalFloor));
280
+
281
+ if (c >= t) return 'APPROVE';
282
+ if (c >= f) return 'CONDITIONAL';
283
+ return 'REJECT';
284
+ }
285
+
286
+ // ─── Vote Disagreement Detection ─────────────────────────
287
+
288
+ /**
289
+ * Check if votes have meaningful disagreement (not unanimous).
290
+ */
291
+ export function hasVoteDisagreement(votes: ReviewerVote[]): boolean {
292
+ if (votes.length <= 1) return false;
293
+ const uniqueVotes = new Set(votes.map(v => v.vote));
294
+ return uniqueVotes.size > 1;
295
+ }
25
296
 
26
297
  // ─── Types ───────────────────────────────────────────────
27
298
 
@@ -34,6 +305,14 @@ export interface ConsensusRunnerConfig {
34
305
  consensusConfig?: Partial<ConsensusConfig>;
35
306
  /** Provider configurations for multi-LLM review */
36
307
  reviewerProviders?: ReviewerProviderConfig[];
308
+ /** Arbitrator provider configuration (v2.1) */
309
+ arbitratorProvider?: ReviewerProviderConfig;
310
+ /** Enable arbitration for deadlocked votes (v2.1) */
311
+ enableArbitration?: boolean;
312
+ /** Skill loader for injecting reviewer/arbitrator skills (v2.2.1) */
313
+ skillLoader?: import('../skill-loader.js').SkillLoader;
314
+ /** Skill usage registry for recording usage events (v2.2.1) */
315
+ skillUsageRegistry?: import('../skills/usage-registry.js').SkillUsageRegistry;
37
316
  }
38
317
 
39
318
  export interface ReviewerProviderConfig {
@@ -47,10 +326,83 @@ const DEFAULT_PROVIDERS: ReviewerProviderConfig[] = [
47
326
  { provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.3 },
48
327
  ];
49
328
 
329
+ // ─── Plan Content Loader ─────────────────────────────────
330
+
331
+ /** Max plan content chars to embed in prompt (50K ~ safe for all providers). */
332
+ const MAX_PLAN_CONTENT_CHARS = 50_000;
333
+
334
+ /**
335
+ * Safely load plan content from disk.
336
+ * - Path traversal guard: resolved path must start with projectDir.
337
+ * - Size cap: truncates with marker if content exceeds MAX_PLAN_CONTENT_CHARS.
338
+ */
339
+ export function loadPlanContent(
340
+ projectDir: string,
341
+ artifactPath: string | undefined,
342
+ ): { content: string; truncated: boolean } {
343
+ if (!artifactPath) {
344
+ logger.warn('[consensus] No master plan path in packet references');
345
+ return { content: '', truncated: false };
346
+ }
347
+
348
+ const resolvedProject = resolve(projectDir);
349
+ const fullPath = resolve(projectDir, artifactPath);
350
+
351
+ // Path traversal guard: resolved path must be inside projectDir
352
+ if (!fullPath.startsWith(resolvedProject + '/') && fullPath !== resolvedProject) {
353
+ logger.warn(`[consensus] Path traversal blocked: ${artifactPath} resolved to ${fullPath}`);
354
+ return { content: '', truncated: false };
355
+ }
356
+
357
+ if (!existsSync(fullPath)) {
358
+ logger.warn(`[consensus] Plan artifact not found at ${fullPath}`);
359
+ return { content: '', truncated: false };
360
+ }
361
+
362
+ let content = readFileSync(fullPath, 'utf-8');
363
+ let truncated = false;
364
+
365
+ if (content.length > MAX_PLAN_CONTENT_CHARS) {
366
+ content = content.slice(0, MAX_PLAN_CONTENT_CHARS)
367
+ + '\n\n[TRUNCATED -- plan exceeds 50K chars. Review based on visible content.]';
368
+ truncated = true;
369
+ logger.warn(`[consensus] Plan content truncated to ${MAX_PLAN_CONTENT_CHARS} chars`);
370
+ }
371
+
372
+ logger.log(`[consensus] Loaded plan content from ${artifactPath} (${content.length} chars${truncated ? ', truncated' : ''})`);
373
+ return { content, truncated };
374
+ }
375
+
376
+ // ─── Arbitration Trigger Detection (v2.4.2) ─────────────
377
+
378
+ export type ArbitrationTrigger = 'DISAGREEMENT' | 'BORDERLINE_SCORE' | 'ALL_CONDITIONAL' | 'NONE';
379
+
380
+ /**
381
+ * Determine whether arbitration should be triggered and why.
382
+ * Pure function — no side effects, easily unit-testable.
383
+ */
384
+ export function getArbitrationTrigger(
385
+ votes: ReviewerVote[],
386
+ weightedScore: number,
387
+ threshold: number,
388
+ ): ArbitrationTrigger {
389
+ if (hasVoteDisagreement(votes)) return 'DISAGREEMENT';
390
+
391
+ if (weightedScore >= (threshold - 0.10)) return 'BORDERLINE_SCORE';
392
+
393
+ const avgConfidence = votes.reduce((s, v) => s + v.confidence, 0) / votes.length;
394
+ const allConditional = votes.every(v => v.vote === 'CONDITIONAL');
395
+ const totalRequired = votes.reduce((sum, v) => sum + (v.required_changes?.length ?? 0), 0);
396
+
397
+ if (allConditional && avgConfidence >= 0.94 && totalRequired <= 3) return 'ALL_CONDITIONAL';
398
+ return 'NONE';
399
+ }
400
+
50
401
  // ─── Consensus Runner ────────────────────────────────────
51
402
 
52
403
  export class ConsensusRunner {
53
404
  private readonly config: ConsensusRunnerConfig;
405
+ private arbitrationAttempted = new Set<string>();
54
406
 
55
407
  constructor(config: ConsensusRunnerConfig) {
56
408
  this.config = config;
@@ -60,6 +412,7 @@ export class ConsensusRunner {
60
412
  async runStructuredConsensus(
61
413
  planPacket: PlanPacket,
62
414
  gateDefinition: GateDefinition,
415
+ options?: { revisionDirective?: string },
63
416
  ): Promise<ConsensusPacket> {
64
417
  const rules: ConsensusRules = {
65
418
  threshold: gateDefinition.consensusThreshold ?? this.config.threshold,
@@ -67,15 +420,58 @@ export class ConsensusRunner {
67
420
  min_reviewers: gateDefinition.minReviewers ?? this.config.minReviewers,
68
421
  };
69
422
 
423
+ // v2.4.4: Dev-time warning when version is missing or stuck at 1
424
+ if (planPacket.metadata.version === undefined || planPacket.metadata.version <= 1) {
425
+ logger.warn(
426
+ `[consensus] Phase ${planPacket.metadata.phase}: version=${planPacket.metadata.version ?? 'undefined'} ` +
427
+ `— ensure this is intentional (not a missing recoveryCount passthrough)`,
428
+ );
429
+ }
430
+
431
+ // v2.2.1: Record REVIEWER skill usage if loader available
432
+ if (this.config.skillLoader && this.config.skillUsageRegistry) {
433
+ const { meta } = this.config.skillLoader.loadSkillWithMeta('REVIEWER');
434
+ this.config.skillUsageRegistry.record(
435
+ 'REVIEWER',
436
+ planPacket.metadata.phase,
437
+ 'review_prompt',
438
+ meta.source,
439
+ meta.version,
440
+ );
441
+ }
442
+
443
+ // Load actual plan content from disk for inclusion in review prompt
444
+ const { content: planContent } = loadPlanContent(
445
+ this.config.projectDir,
446
+ planPacket.references.master_plan?.path,
447
+ );
448
+
449
+ const revisionDirective = options?.revisionDirective;
70
450
  let votes: ReviewerVote[];
71
451
 
72
452
  if (this.config.mode === 'independent') {
73
- votes = await this.runIndependentReview(planPacket);
453
+ votes = await this.runIndependentReview(planPacket, planContent, revisionDirective);
74
454
  } else {
75
- votes = await this.runIterativeReview(planPacket);
455
+ votes = await this.runIterativeReview(planPacket, planContent, revisionDirective);
456
+ }
457
+
458
+ // v2.1: Normalize votes before scoring
459
+ const { votes: normalizedVotes, summary: normSummary } = normalizeVoteBlockers(votes);
460
+
461
+ if (normSummary.forced_rejects > 0) {
462
+ logger.warn(
463
+ `[consensus] Normalization forced ${normSummary.forced_rejects} vote(s) to REJECT due to blocker/pattern contradiction`,
464
+ );
76
465
  }
77
466
 
78
- // Build consensus packet from votes
467
+ logger.log(`[consensus] Normalization: ${JSON.stringify(normSummary)}`);
468
+ for (const v of normalizedVotes) {
469
+ logger.log(
470
+ `[consensus] Normalized: ${v.reviewer_id} vote=${v.vote} conf=${v.confidence.toFixed(3)} blockers=${v.blocking_issues.length}`,
471
+ );
472
+ }
473
+
474
+ // Build consensus packet from normalized votes
79
475
  const packet = buildConsensusPacket({
80
476
  planPacketRef: {
81
477
  artifact_id: planPacket.metadata.packet_id,
@@ -84,26 +480,132 @@ export class ConsensusRunner {
84
480
  version: planPacket.metadata.version,
85
481
  type: 'consensus',
86
482
  },
87
- votes,
483
+ votes: normalizedVotes,
88
484
  rules,
485
+ normalizationMoves: normSummary,
89
486
  });
90
487
 
488
+ logger.log(
489
+ `[consensus] Result: weighted_score=${packet.consensus_result.weighted_score.toFixed(3)} score=${packet.consensus_result.score.toFixed(3)} status=${packet.final_status}`,
490
+ );
491
+
492
+ // v2.4.2: Attempt arbitration for REJECTED packets if enabled
493
+ if (
494
+ packet.final_status === 'REJECTED'
495
+ && this.config.enableArbitration
496
+ && !this.arbitrationAttempted.has(`${planPacket.metadata.phase}@v${planPacket.metadata.version}`)
497
+ ) {
498
+ const arbitrationTrigger = getArbitrationTrigger(
499
+ normalizedVotes, packet.consensus_result.weighted_score, rules.threshold,
500
+ );
501
+ const shouldArbitrate = arbitrationTrigger !== 'NONE';
502
+
503
+ if (shouldArbitrate) {
504
+ logger.log(
505
+ `[consensus] Arbitration triggered: reason=${arbitrationTrigger} weighted_score=${packet.consensus_result.weighted_score.toFixed(3)}`,
506
+ );
507
+ this.arbitrationAttempted.add(`${planPacket.metadata.phase}@v${planPacket.metadata.version}`);
508
+ const arbResult = await this.callArbitrator(planPacket, normalizedVotes, rules, planContent);
509
+ if (arbResult?.approved) {
510
+ // v2.2.1: Record ARBITRATOR skill usage
511
+ if (this.config.skillLoader && this.config.skillUsageRegistry) {
512
+ const { meta } = this.config.skillLoader.loadSkillWithMeta('ARBITRATOR');
513
+ this.config.skillUsageRegistry.record(
514
+ 'ARBITRATOR',
515
+ planPacket.metadata.phase,
516
+ 'arbitration_prompt',
517
+ meta.source,
518
+ meta.version,
519
+ );
520
+ }
521
+
522
+ // Rebuild with arbitration
523
+ return buildConsensusPacket({
524
+ planPacketRef: {
525
+ artifact_id: planPacket.metadata.packet_id,
526
+ path: '',
527
+ sha256: '',
528
+ version: planPacket.metadata.version,
529
+ type: 'consensus',
530
+ },
531
+ votes: normalizedVotes,
532
+ rules,
533
+ arbitratorResult: {
534
+ decision: arbResult.reasoning,
535
+ merged_patch: arbResult.suggestedChanges?.join('\n'),
536
+ },
537
+ normalizationMoves: normSummary,
538
+ });
539
+ }
540
+ }
541
+ }
542
+
543
+ // v2.2.1: Record ARBITRATOR skill usage if arbitration occurred (legacy path)
544
+ if (packet.final_status === 'ARBITRATED' && this.config.skillLoader && this.config.skillUsageRegistry) {
545
+ const { meta } = this.config.skillLoader.loadSkillWithMeta('ARBITRATOR');
546
+ this.config.skillUsageRegistry.record(
547
+ 'ARBITRATOR',
548
+ planPacket.metadata.phase,
549
+ 'arbitration_prompt',
550
+ meta.source,
551
+ meta.version,
552
+ );
553
+ }
554
+
555
+ // v2.4.2: Diagnostic logging at high version counts
556
+ if (planPacket.metadata.version >= 3) {
557
+ logger.warn(
558
+ `[consensus] High iteration count: phase=${planPacket.metadata.phase} version=${planPacket.metadata.version} `
559
+ + `weighted_score=${packet.consensus_result.weighted_score.toFixed(3)} `
560
+ + `has_true_blockers=${packet.consensus_result.has_true_blockers} `
561
+ + `status=${packet.final_status}`,
562
+ );
563
+ }
564
+
91
565
  return packet;
92
566
  }
93
567
 
94
568
  /** Independent review: spawn N reviewers, each reviews independently */
95
- async runIndependentReview(planPacket: PlanPacket): Promise<ReviewerVote[]> {
96
- const providers = this.config.reviewerProviders ?? DEFAULT_PROVIDERS;
569
+ async runIndependentReview(planPacket: PlanPacket, planContent: string, revisionDirective?: string): Promise<ReviewerVote[]> {
570
+ let providers = [...(this.config.reviewerProviders ?? DEFAULT_PROVIDERS)];
571
+
572
+ // v2.4.2: Escalation — add tie-breaking reviewer on high iteration count.
573
+ // Only select from configured providers (arbitrator config is a valid source).
574
+ if (planPacket.metadata.version >= 3 && providers.length < 3) {
575
+ const existingNames = new Set(providers.map(p => p.provider));
576
+
577
+ // Build candidates from: arbitrator provider + all configured reviewers (deduplicated)
578
+ const candidates = new Set<string>();
579
+ if (this.config.arbitratorProvider) candidates.add(this.config.arbitratorProvider.provider);
580
+ for (const p of this.config.reviewerProviders ?? DEFAULT_PROVIDERS) candidates.add(p.provider);
581
+
582
+ // Pick first configured provider not already reviewing
583
+ const PREFERRED_ORDER = ['grok', 'openai', 'gemini'];
584
+ const tieBreaker = PREFERRED_ORDER.find(p => candidates.has(p) && !existingNames.has(p));
585
+
586
+ if (tieBreaker) {
587
+ const model = getModelForProvider(this.config.consensusConfig, tieBreaker);
588
+ providers.push({ provider: tieBreaker, model, temperature: 0.3 });
589
+ logger.log(
590
+ `[consensus] Escalation: added ${tieBreaker}/${model} as tie-breaking reviewer (v${planPacket.metadata.version})`,
591
+ );
592
+ } else {
593
+ logger.warn(
594
+ `[consensus] Escalation: no additional provider available. ` +
595
+ `configured=${[...candidates].join(',')} ` +
596
+ `in_use=${[...existingNames].join(',')}`,
597
+ );
598
+ }
599
+ }
600
+
97
601
  const numReviewers = Math.max(
98
602
  this.config.minReviewers,
99
603
  providers.length,
100
604
  );
101
605
 
102
- // Build the review prompt from the plan packet
103
- const prompt = buildReviewPrompt(planPacket);
606
+ const prompt = buildReviewPrompt(planPacket, planContent, revisionDirective);
104
607
  const promptHash = createHash('sha256').update(prompt).digest('hex');
105
608
 
106
- // Spawn reviewers in parallel
107
609
  const reviewPromises: Promise<ReviewerVote>[] = [];
108
610
  for (let i = 0; i < numReviewers; i++) {
109
611
  const provider = providers[i % providers.length];
@@ -121,8 +623,8 @@ export class ConsensusRunner {
121
623
  }
122
624
 
123
625
  /** Iterative review: wraps existing iterateUntilConsensus */
124
- async runIterativeReview(planPacket: PlanPacket): Promise<ReviewerVote[]> {
125
- const prompt = buildReviewPrompt(planPacket);
626
+ async runIterativeReview(planPacket: PlanPacket, planContent: string, revisionDirective?: string): Promise<ReviewerVote[]> {
627
+ const prompt = buildReviewPrompt(planPacket, planContent, revisionDirective);
126
628
 
127
629
  try {
128
630
  const result = await iterateUntilConsensus(
@@ -134,15 +636,15 @@ export class ConsensusRunner {
134
636
  },
135
637
  );
136
638
 
137
- // Convert legacy result to ReviewerVote format
639
+ const iterativeConfidence = (result.finalScore ?? 50) / 100;
138
640
  const vote: ReviewerVote = {
139
641
  reviewer_id: 'iterative-reviewer',
140
642
  provider: 'openai',
141
643
  model: this.config.consensusConfig?.openaiModel ?? 'gpt-4.1',
142
644
  temperature: this.config.consensusConfig?.temperature ?? 0.3,
143
645
  prompt_hash: createHash('sha256').update(prompt).digest('hex'),
144
- vote: result.approved ? 'APPROVE' : 'REJECT',
145
- confidence: result.finalScore ?? 0.5,
646
+ vote: mapVote(iterativeConfidence, this.config.threshold),
647
+ confidence: iterativeConfidence,
146
648
  blocking_issues: result.finalConcerns ?? [],
147
649
  suggestions: result.finalRecommendations ?? [],
148
650
  evidence_refs: [],
@@ -165,7 +667,11 @@ export class ConsensusRunner {
165
667
  }
166
668
  }
167
669
 
168
- /** Spawn a single independent reviewer */
670
+ /**
671
+ * Spawn a single independent reviewer.
672
+ * Governance rule: vote is ALWAYS derived from confidence via mapVote().
673
+ * The LLM's explicit vote is advisory only — logged for debugging.
674
+ */
169
675
  private async spawnSingleReviewer(
170
676
  prompt: string,
171
677
  promptHash: string,
@@ -175,17 +681,34 @@ export class ConsensusRunner {
175
681
  try {
176
682
  const result = await this.callProviderForReview(prompt, provider);
177
683
 
684
+ // Governance: always derive vote from confidence, never trust LLM's explicit vote
685
+ const derived = mapVote(result.confidence, this.config.threshold);
686
+ const modelVote = result.modelVote ?? null;
687
+ const reviewer_inconsistency = modelVote !== null && modelVote !== derived;
688
+
689
+ if (reviewer_inconsistency) {
690
+ logger.log(
691
+ `[consensus] ${provider.provider}: model said ${modelVote} but confidence ${result.confidence.toFixed(3)} -> derived ${derived}`,
692
+ );
693
+ }
694
+
695
+ logger.log(
696
+ `[consensus] ${provider.provider}/${provider.model}: vote=${derived} confidence=${result.confidence.toFixed(3)} modelVote=${modelVote ?? 'none'} blockers=${result.blockingIssues.length}`,
697
+ );
698
+
178
699
  return {
179
700
  reviewer_id: reviewerId,
180
701
  provider: provider.provider,
181
702
  model: provider.model,
182
703
  temperature: provider.temperature,
183
704
  prompt_hash: promptHash,
184
- vote: result.approved ? 'APPROVE' : 'REJECT',
705
+ vote: derived,
185
706
  confidence: result.confidence,
186
707
  blocking_issues: result.blockingIssues,
708
+ required_changes: result.requiredChanges ?? [],
187
709
  suggestions: result.suggestions,
188
710
  evidence_refs: [],
711
+ reviewer_inconsistency,
189
712
  };
190
713
  } catch {
191
714
  return {
@@ -203,63 +726,423 @@ export class ConsensusRunner {
203
726
  }
204
727
  }
205
728
 
206
- /** Call the appropriate provider adapter for a review */
729
+ /**
730
+ * Call the appropriate provider adapter for a review.
731
+ * Uses requestRawReview() to bypass adapter prompt wrapping/parsing —
732
+ * the runner owns the prompt and parses the raw LLM response itself.
733
+ */
207
734
  private async callProviderForReview(
208
735
  prompt: string,
209
736
  provider: ReviewerProviderConfig,
210
737
  ): Promise<ProviderReviewResult> {
738
+ let raw: string;
739
+
211
740
  switch (provider.provider) {
212
741
  case 'openai': {
213
- const { requestConsensus } = await import('../../adapters/openai.js');
214
- const result = await requestConsensus(prompt, '', {
742
+ const { requestRawReview } = await import('../../adapters/openai.js');
743
+ raw = await requestRawReview(prompt, {
215
744
  openaiModel: provider.model,
216
745
  temperature: provider.temperature,
217
746
  } as Partial<ConsensusConfig>);
218
- return parseConsensusResult(result);
747
+ break;
219
748
  }
220
749
  case 'gemini': {
221
- const { requestConsensus } = await import('../../adapters/gemini.js');
222
- const result = await requestConsensus(prompt, '', {
223
- model: provider.model as never,
750
+ const { requestRawReview } = await import('../../adapters/gemini.js');
751
+ raw = await requestRawReview(prompt, {
752
+ model: provider.model,
224
753
  temperature: provider.temperature,
225
754
  });
226
- return parseConsensusResult(result);
755
+ break;
227
756
  }
228
757
  case 'grok': {
229
- const { requestConsensus } = await import('../../adapters/grok.js');
230
- const result = await requestConsensus(prompt, '', {
758
+ const { requestRawReview } = await import('../../adapters/grok.js');
759
+ raw = await requestRawReview(prompt, {
231
760
  model: provider.model,
232
761
  temperature: provider.temperature,
233
762
  });
234
- return parseConsensusResult(result);
763
+ break;
235
764
  }
236
765
  default:
237
766
  throw new Error(`Unknown provider: ${provider.provider}`);
238
767
  }
768
+
769
+ logger.log(`[consensus] raw(${provider.provider}/${provider.model}): ${raw.slice(0, 500)}`);
770
+ return parseRawReviewResponse(raw);
771
+ }
772
+
773
+ /**
774
+ * Call arbitrator provider for tie-breaking (v2.1).
775
+ * v2.4.2: Rotates arbitrator away from dissenting reviewers to prevent
776
+ * systematic failure (e.g., Gemini rejects as reviewer + as arbitrator).
777
+ */
778
+ private async callArbitrator(
779
+ planPacket: PlanPacket,
780
+ votes: ReviewerVote[],
781
+ _rules: ConsensusRules,
782
+ planContent?: string,
783
+ ): Promise<ArbitrationResult | null> {
784
+ let provider = this.config.arbitratorProvider;
785
+ if (!provider) return null;
786
+
787
+ // v2.4.2: Rotate arbitrator away from dissenting reviewers
788
+ const dissentingProviders = new Set(
789
+ votes.filter(v => v.vote === 'REJECT').map(v => v.provider),
790
+ );
791
+ if (dissentingProviders.has(provider.provider)) {
792
+ const configuredProviders = new Set(
793
+ (this.config.reviewerProviders ?? DEFAULT_PROVIDERS).map(p => p.provider),
794
+ );
795
+ if (this.config.arbitratorProvider) configuredProviders.add(this.config.arbitratorProvider.provider);
796
+
797
+ const ARBITRATOR_FALLBACK_ORDER = ['openai', 'grok', 'gemini'];
798
+ const alternate = ARBITRATOR_FALLBACK_ORDER.find(
799
+ p => !dissentingProviders.has(p) && configuredProviders.has(p),
800
+ );
801
+ if (alternate && alternate !== provider.provider) {
802
+ const model = getModelForProvider(this.config.consensusConfig, alternate);
803
+ logger.log(
804
+ `[consensus] Arbitrator rotation: ${provider.provider} is a dissenter, switching to ${alternate}/${model}`,
805
+ );
806
+ provider = { provider: alternate, model, temperature: 0.2 };
807
+ } else {
808
+ logger.warn(
809
+ `[consensus] Arbitrator rotation: no configured non-dissenter provider available, keeping ${provider.provider}`,
810
+ );
811
+ }
812
+ }
813
+
814
+ try {
815
+ const prompt = buildArbitrationPrompt(planPacket, votes, planContent);
816
+
817
+ // v2.6.0: Use shared queryProvider for adapter wiring + timeout
818
+ const raw = await queryProvider(prompt, provider);
819
+ if (!raw) return null;
820
+
821
+ logger.log(`[consensus] arbitrator raw(${provider.provider}/${provider.model}): ${raw.slice(0, 500)}`);
822
+
823
+ // v2.4.3: Dedicated arbitrator response parser (not reviewer schema)
824
+ const parsed = parseArbitratorResponse(raw);
825
+
826
+ logger.log(
827
+ `[consensus] Arbitrator decision: approved=${parsed.approved} ` +
828
+ `suggestedChanges=${parsed.suggestedChanges.length}`,
829
+ );
830
+
831
+ return {
832
+ approved: parsed.approved,
833
+ score: parsed.approved ? 90 : 10,
834
+ analysis: raw.slice(0, 2000),
835
+ criticalConcerns: [],
836
+ minorConcerns: [],
837
+ subjectiveConcerns: [],
838
+ reasoning: parsed.reasoning || raw.slice(0, 2000),
839
+ suggestedChanges: parsed.suggestedChanges,
840
+ rawResponse: raw,
841
+ };
842
+ } catch (err) {
843
+ logger.warn(`[consensus] Arbitration call failed: ${err instanceof Error ? err.message : 'unknown'}`);
844
+ return null;
845
+ }
239
846
  }
240
847
  }
241
848
 
242
849
  // ─── Helper Types ────────────────────────────────────────
243
850
 
244
- interface ProviderReviewResult {
245
- approved: boolean;
851
+ export interface ProviderReviewResult {
246
852
  confidence: number;
247
853
  blockingIssues: string[];
248
854
  suggestions: string[];
855
+ requiredChanges?: string[];
856
+ /** LLM's explicit vote — advisory only, never used for gate decisions */
857
+ modelVote?: 'APPROVE' | 'REJECT' | 'CONDITIONAL' | null;
858
+ }
859
+
860
+ // ─── JSON-first Response Parsing ─────────────────────────
861
+
862
+ /**
863
+ * Zod schema for structured JSON review responses from the LLM.
864
+ */
865
+ const ReviewResponseSchema = z.object({
866
+ vote: z.enum(['APPROVE', 'CONDITIONAL', 'REJECT']),
867
+ confidence: z.number().min(0).max(1),
868
+ blocking_issues: z.array(z.string()).default([]),
869
+ required_changes: z.array(z.string()).default([]),
870
+ suggestions: z.array(z.string()).default([]),
871
+ analysis: z.string().optional(),
872
+ });
873
+
874
+ /**
875
+ * Parse raw LLM response text into a ProviderReviewResult.
876
+ * Strategy 1: Try JSON parse first (expected format).
877
+ * Strategy 2: Regex fallback for free-form text responses.
878
+ *
879
+ * @param raw - Raw text from the LLM
880
+ * @returns Parsed review result with confidence, issues, and advisory vote
881
+ */
882
+ export function parseRawReviewResponse(raw: string): ProviderReviewResult {
883
+ const jsonResult = tryParseJSON(raw);
884
+ const result = jsonResult ?? parseRegexFallback(raw);
885
+
886
+ // Correct confidence if vote and confidence are semantically contradictory
887
+ const { confidence, wasContradiction, original } = correctConfidenceContradiction(
888
+ result.modelVote ?? null,
889
+ result.confidence,
890
+ );
891
+
892
+ if (wasContradiction) {
893
+ logger.warn(
894
+ `[consensus] Confidence contradiction corrected: vote=${result.modelVote} `
895
+ + `conf=${original.toFixed(3)} -> corrected=${confidence.toFixed(3)}`,
896
+ );
897
+ }
898
+
899
+ return { ...result, confidence };
249
900
  }
250
901
 
251
- function parseConsensusResult(result: ConsensusResult): ProviderReviewResult {
902
+ /**
903
+ * Attempt to parse a JSON response, optionally wrapped in markdown code fences.
904
+ */
905
+ function tryParseJSON(raw: string): ProviderReviewResult | null {
906
+ // Extract JSON from response (may be wrapped in markdown code fences)
907
+ const jsonMatch = raw.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/);
908
+ const candidate = (jsonMatch ? jsonMatch[1] : raw).trim();
909
+ if (!candidate) return null;
910
+
911
+ try {
912
+ const parsed = JSON.parse(candidate);
913
+ const validated = ReviewResponseSchema.safeParse(parsed);
914
+ if (!validated.success) return null;
915
+ const d = validated.data;
916
+ return {
917
+ confidence: d.confidence,
918
+ blockingIssues: d.blocking_issues,
919
+ suggestions: d.suggestions,
920
+ requiredChanges: d.required_changes,
921
+ modelVote: d.vote,
922
+ };
923
+ } catch {
924
+ return null;
925
+ }
926
+ }
927
+
928
+ /**
929
+ * Regex fallback parser for free-form text responses.
930
+ * Extracts vote, confidence, and issue lists from unstructured text.
931
+ */
932
+ function parseRegexFallback(raw: string): ProviderReviewResult {
933
+ // Extract vote (advisory only)
934
+ const voteMatch = raw.match(/\bVOTE:\s*(APPROVE|REJECT|CONDITIONAL)\b/i)
935
+ || raw.match(/\b(APPROVE|REJECT|CONDITIONAL)\b/i);
936
+ const modelVote = voteMatch
937
+ ? voteMatch[1].toUpperCase() as 'APPROVE' | 'REJECT' | 'CONDITIONAL'
938
+ : null;
939
+
940
+ // Extract confidence (0-1 scale) — try multiple patterns
941
+ // Note: JSON keys have quotes ("confidence": 0.88), so patterns must handle optional quotes
942
+ let confidence = 0;
943
+ const confPatterns = [
944
+ /"?CONFIDENCE"?\s*:\s*(\d+\.?\d*)/i,
945
+ /"?[Cc]onfidence"?\s*(?:score)?[:\s]+(\d+\.?\d*)/,
946
+ /(\d+\.?\d*)\s*\/\s*1(?:\.0)?/,
947
+ ];
948
+ for (const pattern of confPatterns) {
949
+ const match = raw.match(pattern);
950
+ if (match) {
951
+ const val = parseFloat(match[1]);
952
+ confidence = val > 1 ? val / 100 : val;
953
+ break;
954
+ }
955
+ }
956
+ // Fallback: CONSENSUS: XX% format (legacy adapter format)
957
+ if (confidence === 0) {
958
+ const consensusMatch = raw.match(/CONSENSUS:\s*(\d+)%/i);
959
+ if (consensusMatch) confidence = parseInt(consensusMatch[1], 10) / 100;
960
+ }
961
+
962
+ // Extract issues — handle flexible section headings and tagged items
963
+ const blockingIssues = extractTaggedList(raw, 'BLOCKER')
964
+ .concat(extractSectionList(raw, 'BLOCKING.?ISSUES'));
965
+ const requiredChanges = extractTaggedList(raw, 'REQUIRED')
966
+ .concat(extractSectionList(raw, 'REQUIRED.?CHANGES'));
967
+ const suggestions = extractTaggedList(raw, 'SUGGESTION')
968
+ .concat(extractSectionList(raw, 'SUGGESTIONS', 'CONCERNS', 'RECOMMENDATIONS'));
969
+
252
970
  return {
253
- approved: result.approved,
254
- confidence: result.score / 100, // score is 0-100, confidence is 0-1
255
- blockingIssues: result.concerns ?? [],
256
- suggestions: result.recommendations ?? [],
971
+ confidence: Math.max(0, Math.min(1, confidence)),
972
+ blockingIssues: dedup(blockingIssues),
973
+ suggestions: dedup(suggestions),
974
+ requiredChanges: dedup(requiredChanges),
975
+ modelVote,
257
976
  };
258
977
  }
259
978
 
979
+ /**
980
+ * Extract items prefixed with [TAG] from raw text.
981
+ * E.g. "[BLOCKER] SQL injection vulnerability" → "SQL injection vulnerability"
982
+ */
983
+ function extractTaggedList(raw: string, tag: string): string[] {
984
+ const regex = new RegExp(`\\[${tag}\\]\\s*:?\\s*(.+)`, 'gi');
985
+ const items: string[] = [];
986
+ let m;
987
+ while ((m = regex.exec(raw)) !== null) items.push(m[1].trim());
988
+ return items;
989
+ }
990
+
991
+ /**
992
+ * Extract bullet items from a named section (flexible headings).
993
+ * Handles "BLOCKING ISSUES:", "BLOCKING_ISSUES:", "Blocking Issues:", etc.
994
+ */
995
+ function extractSectionList(raw: string, ...patterns: string[]): string[] {
996
+ for (const pat of patterns) {
997
+ const regex = new RegExp(`${pat}[:\\s]*\\n([\\s\\S]*?)(?=\\n(?:[A-Z][A-Z_\\s]+:|##)|$)`, 'i');
998
+ const match = raw.match(regex);
999
+ if (match) {
1000
+ return match[1]
1001
+ .split('\n')
1002
+ .map(l => l.replace(/^[\s]*[-*]\s*/, '').replace(/^\d+\.\s*/, '').trim())
1003
+ .filter(l => l.length > 0 && !/^none$/i.test(l));
1004
+ }
1005
+ }
1006
+ return [];
1007
+ }
1008
+
1009
+ /**
1010
+ * Deduplicate a string array (case-insensitive).
1011
+ */
1012
+ function dedup(items: string[]): string[] {
1013
+ const seen = new Set<string>();
1014
+ return items.filter(i => {
1015
+ const key = i.toLowerCase().trim();
1016
+ if (seen.has(key)) return false;
1017
+ seen.add(key);
1018
+ return true;
1019
+ });
1020
+ }
1021
+
1022
+
1023
+ // ─── Confidence Contradiction Correction ─────────────────
1024
+
1025
+ /**
1026
+ * Correct confidence when it contradicts the model's explicit vote.
1027
+ *
1028
+ * The prompt defines confidence as "plan quality score" (0-1) and
1029
+ * assigns ranges: REJECT < 0.80, CONDITIONAL 0.80-0.94, APPROVE >= 0.95.
1030
+ * Some models confuse this with "assessment certainty" and return e.g.
1031
+ * REJECT + 0.99 ("99% sure it's bad"). This function inverts such
1032
+ * contradictions so mapVote() receives a semantically correct input.
1033
+ *
1034
+ * Correction is SYMMETRIC across all three bands:
1035
+ * - REJECT + conf >= 0.80 -> invert: min(0.79, 1 - conf)
1036
+ * - CONDITIONAL + conf >= 0.95 -> snap to midpoint 0.87
1037
+ * - CONDITIONAL + conf < 0.80 -> snap to midpoint 0.87
1038
+ * - APPROVE + conf < 0.80 -> invert: max(0.95, 1 - conf)
1039
+ * - APPROVE + conf in [0.80, 0.95) -> snap to 0.95
1040
+ *
1041
+ * If modelVote is null (regex fallback couldn't find a vote), no correction.
1042
+ */
1043
+ export function correctConfidenceContradiction(
1044
+ modelVote: 'APPROVE' | 'REJECT' | 'CONDITIONAL' | null,
1045
+ rawConfidence: number,
1046
+ ): { confidence: number; wasContradiction: boolean; original: number } {
1047
+ if (modelVote === null) {
1048
+ return { confidence: rawConfidence, wasContradiction: false, original: rawConfidence };
1049
+ }
1050
+ const c = Math.max(0, Math.min(1, rawConfidence));
1051
+
1052
+ // REJECT + confidence >= 0.80: model confused "certainty" with "quality"
1053
+ // Invert, cap at 0.79 (top of REJECT range)
1054
+ if (modelVote === 'REJECT' && c >= 0.80) {
1055
+ const corrected = Math.min(0.79, 1.0 - c);
1056
+ return { confidence: corrected, wasContradiction: true, original: c };
1057
+ }
1058
+
1059
+ // CONDITIONAL outside its range [0.80, 0.95): snap to midpoint 0.87
1060
+ if (modelVote === 'CONDITIONAL' && (c >= 0.95 || c < 0.80)) {
1061
+ return { confidence: 0.87, wasContradiction: true, original: c };
1062
+ }
1063
+
1064
+ // APPROVE + confidence < 0.80: model confused semantics
1065
+ // Invert, floor at 0.95 (bottom of APPROVE range)
1066
+ if (modelVote === 'APPROVE' && c < 0.80) {
1067
+ const corrected = Math.max(0.95, 1.0 - c);
1068
+ return { confidence: corrected, wasContradiction: true, original: c };
1069
+ }
1070
+
1071
+ // APPROVE + confidence in [0.80, 0.95): slightly off, snap to 0.95
1072
+ if (modelVote === 'APPROVE' && c < 0.95) {
1073
+ return { confidence: 0.95, wasContradiction: true, original: c };
1074
+ }
1075
+
1076
+ return { confidence: c, wasContradiction: false, original: c };
1077
+ }
1078
+
1079
+ // ─── Arbitrator Response Parser (v2.4.3) ─────────────────
1080
+
1081
+ /**
1082
+ * Zod schema for arbitrator JSON responses.
1083
+ * Accepts both camelCase and snake_case for suggestedChanges.
1084
+ */
1085
+ const ArbitratorResponseSchema = z.object({
1086
+ approved: z.boolean(),
1087
+ reasoning: z.string().optional(),
1088
+ suggestedChanges: z.array(z.string()).default([]),
1089
+ suggested_changes: z.array(z.string()).default([]),
1090
+ });
1091
+
1092
+ /**
1093
+ * Parse raw arbitrator response into a structured result.
1094
+ * Strategy 1: JSON parse (optionally wrapped in code fences).
1095
+ * Strategy 2: Regex fallback for free-form text.
1096
+ *
1097
+ * @param raw - Raw text from the arbitrator LLM
1098
+ * @returns Parsed result with approved boolean, reasoning, and suggested changes
1099
+ */
1100
+ export function parseArbitratorResponse(raw: string): {
1101
+ approved: boolean;
1102
+ reasoning: string;
1103
+ suggestedChanges: string[];
1104
+ } {
1105
+ // Strategy 1: JSON parse (with optional code fence wrapping)
1106
+ const jsonMatch = raw.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/);
1107
+ const candidate = (jsonMatch ? jsonMatch[1] : raw).trim();
1108
+ try {
1109
+ const parsed = JSON.parse(candidate);
1110
+ const validated = ArbitratorResponseSchema.safeParse(parsed);
1111
+ if (validated.success) {
1112
+ const data = validated.data;
1113
+ return {
1114
+ approved: data.approved,
1115
+ reasoning: data.reasoning ?? '',
1116
+ suggestedChanges: [
1117
+ ...(data.suggestedChanges ?? []),
1118
+ ...(data.suggested_changes ?? []),
1119
+ ],
1120
+ };
1121
+ }
1122
+ } catch { /* fall through to regex */ }
1123
+
1124
+ // Strategy 2: Regex fallback for free-form text
1125
+ let approved = false;
1126
+ const approvedMatch =
1127
+ raw.match(/approved\s*[:=]\s*(true|false)/i) ??
1128
+ raw.match(/\b(approve|approved|accept|accepted)\b/i) ??
1129
+ raw.match(/\b(reject|rejected|deny|denied)\b/i);
1130
+
1131
+ if (approvedMatch) {
1132
+ const val = approvedMatch[1].toLowerCase();
1133
+ approved = ['true', 'approve', 'approved', 'accept', 'accepted'].includes(val);
1134
+ }
1135
+
1136
+ const changes: string[] = [];
1137
+ const changeMatches = raw.matchAll(/(?:^|\n)\s*[-*\d.]+\s+(.+)/g);
1138
+ for (const m of changeMatches) changes.push(m[1].trim());
1139
+
1140
+ return { approved, reasoning: raw.slice(0, 2000), suggestedChanges: changes };
1141
+ }
1142
+
260
1143
  // ─── Prompt Builder ──────────────────────────────────────
261
1144
 
262
- export function buildReviewPrompt(planPacket: PlanPacket): string {
1145
+ export function buildReviewPrompt(planPacket: PlanPacket, planContent?: string, revisionDirective?: string): string {
263
1146
  const lines: string[] = [
264
1147
  `# Independent Plan Review`,
265
1148
  ``,
@@ -275,6 +1158,18 @@ export function buildReviewPrompt(planPacket: PlanPacket): string {
275
1158
  ``,
276
1159
  ];
277
1160
 
1161
+ // Render plan content (loaded from disk by caller)
1162
+ if (planContent && planContent.trim().length > 0) {
1163
+ lines.push(`## Plan Content`, ``, planContent, ``);
1164
+ } else {
1165
+ lines.push(
1166
+ `## Plan Content`,
1167
+ ``,
1168
+ `[WARNING: Plan content could not be loaded. Review based on metadata only.]`,
1169
+ ``,
1170
+ );
1171
+ }
1172
+
278
1173
  if (planPacket.open_questions?.length) {
279
1174
  lines.push(`## Open Questions`);
280
1175
  lines.push(...planPacket.open_questions.map((q) => `- ${q}`));
@@ -290,22 +1185,180 @@ export function buildReviewPrompt(planPacket: PlanPacket): string {
290
1185
  `3. Feasibility — can this be implemented as described?`,
291
1186
  `4. Constitution compliance — does it follow governance rules?`,
292
1187
  ``,
293
- `Respond with:`,
294
- `- APPROVE, REJECT, or CONDITIONAL`,
295
- `- Confidence score (0-1)`,
296
- `- Blocking issues (if any)`,
297
- `- Suggestions for improvement`,
1188
+ `## Scoring Guide`,
1189
+ ``,
1190
+ `The "confidence" field represents your assessment of PLAN QUALITY, NOT how certain you are about your review.`,
1191
+ `It answers: "How ready is this plan for execution on a scale of 0.00 to 1.00?"`,
1192
+ ``,
1193
+ `- confidence 0.95-1.00 (vote APPROVE): The plan is EXECUTION-READY as-is.`,
1194
+ `- confidence 0.80-0.94 (vote CONDITIONAL): The plan needs specific changes before execution.`,
1195
+ `- confidence below 0.80 (vote REJECT): The plan has fundamental issues.`,
1196
+ ``,
1197
+ `CRITICAL: Your vote and confidence MUST be consistent:`,
1198
+ ` - REJECT requires confidence below 0.80`,
1199
+ ` - CONDITIONAL requires confidence between 0.80 and 0.94`,
1200
+ ` - APPROVE requires confidence 0.95 or above`,
1201
+ `Do NOT use confidence to express how certain you are of your assessment.`,
1202
+ `A REJECT with confidence 0.99 is INVALID -- it implies the plan is 99% ready while rejecting it.`,
1203
+ `Mismatched vote+confidence will be auto-corrected by the system.`,
1204
+ ``,
1205
+ `IMPORTANT: "Execution-ready" means a competent developer could implement this plan successfully, not that the plan is theoretically perfect. Reserve CONDITIONAL for changes that would cause implementation to fail or produce incorrect results, not style preferences.`,
1206
+ ``,
1207
+ `## Output Format for Issues`,
1208
+ `- Prefix blocking issues with [BLOCKER]: items that MUST be fixed before approval`,
1209
+ `- Prefix required changes with [REQUIRED]: items that need changes but are not deal-breakers`,
1210
+ `- Prefix suggestions with [SUGGESTION]: nice-to-have improvements`,
1211
+ ``,
1212
+ `IMPORTANT: If your vote is APPROVE or CONDITIONAL, do NOT list [BLOCKER] items.`,
1213
+ `[BLOCKER] items are only valid with a REJECT vote.`,
1214
+ ``,
1215
+ `## Response Format`,
1216
+ ``,
1217
+ `Return ONLY a JSON object matching this schema:`,
1218
+ ``,
1219
+ '```json',
1220
+ `{`,
1221
+ ` "vote": "APPROVE" | "CONDITIONAL" | "REJECT",`,
1222
+ ` "confidence": 0.00, // Plan quality score, NOT review certainty`,
1223
+ ` "blocking_issues": ["[BLOCKER] ..."],`,
1224
+ ` "required_changes": ["[REQUIRED] ..."],`,
1225
+ ` "suggestions": ["[SUGGESTION] ..."],`,
1226
+ ` "analysis": "Your detailed analysis here"`,
1227
+ `}`,
1228
+ '```',
1229
+ ``,
1230
+ `### Examples of VALID responses:`,
1231
+ `- APPROVE with confidence 0.97: "Plan is solid, minor style nits only"`,
1232
+ `- CONDITIONAL with confidence 0.85: "Need to add error handling for X"`,
1233
+ `- REJECT with confidence 0.45: "Missing entire auth layer, unclear data model"`,
1234
+ ``,
1235
+ `### Examples of INVALID responses (will be auto-corrected):`,
1236
+ `- REJECT with confidence 0.99: This means "plan is 99% ready" while rejecting it`,
1237
+ `- APPROVE with confidence 0.60: This means "plan has issues" while approving it`,
1238
+ ``,
1239
+ `Confidence = plan quality score (NOT review certainty):`,
1240
+ `- 0.95-1.00: APPROVE range -- plan is execution-ready`,
1241
+ `- 0.80-0.94: CONDITIONAL range -- specific changes needed`,
1242
+ `- Below 0.80: REJECT range -- fundamental issues`,
1243
+ ``,
1244
+ `Your vote and confidence MUST fall in the same range. Mismatches will be auto-corrected.`,
1245
+ ``,
1246
+ `If vote is APPROVE: blocking_issues and required_changes must be empty arrays.`,
1247
+ `If vote is CONDITIONAL: blocking_issues must be empty, use required_changes.`,
1248
+ `If vote is REJECT: use blocking_issues for critical issues.`,
298
1249
  );
299
1250
 
1251
+ // v2.4.2: Add revision notice + prior feedback for plan revisions
1252
+ if (planPacket.metadata.version > 1) {
1253
+ lines.push(
1254
+ ``,
1255
+ `## Revision Notice`,
1256
+ ``,
1257
+ `This is revision ${planPacket.metadata.version} of the plan.`,
1258
+ `Prioritize verifying whether prior issues have been adequately addressed.`,
1259
+ `Also flag any new *critical* issues you discover.`,
1260
+ ``,
1261
+ );
1262
+ }
1263
+
1264
+ if (revisionDirective && revisionDirective.trim().length > 0) {
1265
+ const trimmed = revisionDirective.trim();
1266
+ const capped = trimmed.length > 2000
1267
+ ? trimmed.slice(0, 2000) + '\n\n[TRUNCATED -- full directive exceeds 2000 chars]'
1268
+ : trimmed;
1269
+ lines.push(
1270
+ `## Prior Feedback (Must Address)`,
1271
+ ``,
1272
+ capped,
1273
+ ``,
1274
+ `Confirm each item above is addressed or explain why it is not applicable.`,
1275
+ ``,
1276
+ );
1277
+ }
1278
+
300
1279
  return lines.join('\n');
301
1280
  }
302
1281
 
1282
+ /**
1283
+ * Build arbitration prompt with reviewer feedback context.
1284
+ */
1285
+ function buildArbitrationPrompt(planPacket: PlanPacket, votes: ReviewerVote[], planContent?: string): string {
1286
+ const voteSummary = votes.map((v, i) => {
1287
+ const parts = [
1288
+ `### Reviewer ${i + 1} (${v.provider}/${v.model})`,
1289
+ `Vote: ${v.vote} (confidence: ${v.confidence.toFixed(2)})`,
1290
+ ];
1291
+ if (v.blocking_issues.length > 0) {
1292
+ parts.push(`Blocking: ${v.blocking_issues.join('; ')}`);
1293
+ }
1294
+ if (v.required_changes?.length) {
1295
+ parts.push(`Required changes: ${v.required_changes.join('; ')}`);
1296
+ }
1297
+ if (v.suggestions.length > 0) {
1298
+ parts.push(`Suggestions: ${v.suggestions.join('; ')}`);
1299
+ }
1300
+ return parts.join('\n');
1301
+ }).join('\n\n');
1302
+
1303
+ const planSection = (planContent && planContent.trim().length > 0)
1304
+ ? [`## Plan Content`, ``, planContent, ``]
1305
+ : [`## Plan Content`, ``, `[WARNING: Plan content could not be loaded.]`, ``];
1306
+
1307
+ return [
1308
+ `# Arbitration Request`,
1309
+ ``,
1310
+ `## Phase: ${planPacket.metadata.phase}`,
1311
+ `## Plan Version: ${planPacket.metadata.version}`,
1312
+ ``,
1313
+ ...planSection,
1314
+ `## Reviewer Votes`,
1315
+ voteSummary,
1316
+ ``,
1317
+ `## Instructions`,
1318
+ `The reviewers above could not reach consensus. As arbitrator:`,
1319
+ `1. Analyze the disagreement points`,
1320
+ `2. Determine if the plan is execution-ready with minor amendments`,
1321
+ `3. If approving, provide specific suggestedChanges that address each required_change`,
1322
+ `4. If the issues are fundamental, do NOT approve`,
1323
+ ``,
1324
+ `Provide your decision as: approved (true/false), reasoning, and suggestedChanges array.`,
1325
+ ].join('\n');
1326
+ }
1327
+
303
1328
  // ─── Factory ─────────────────────────────────────────────
304
1329
 
1330
+ /**
1331
+ * Helper to resolve model string for a given provider from consensus config.
1332
+ */
1333
+ export function getModelForProvider(
1334
+ config: Partial<ConsensusConfig> | undefined,
1335
+ provider: string,
1336
+ ): string {
1337
+ if (!config) return provider === 'openai' ? 'gpt-4.1' : provider === 'gemini' ? 'gemini-2.5-flash' : 'grok-3';
1338
+ switch (provider) {
1339
+ case 'openai': return config.openaiModel ?? 'gpt-4.1';
1340
+ case 'gemini': return config.geminiModel ?? 'gemini-2.5-flash';
1341
+ case 'grok': return config.grokModel ?? 'grok-3';
1342
+ default: return 'gpt-4.1';
1343
+ }
1344
+ }
1345
+
305
1346
  export function createConsensusRunner(
306
1347
  projectDir: string,
307
1348
  consensusConfig?: Partial<ConsensusConfig>,
1349
+ skillLoader?: import('../skill-loader.js').SkillLoader,
1350
+ skillUsageRegistry?: import('../skills/usage-registry.js').SkillUsageRegistry,
308
1351
  ): ConsensusRunner {
1352
+ // Wire arbitration from consensus config
1353
+ const enableArbitration = consensusConfig?.enableArbitration !== false;
1354
+ const arbitratorProvider = enableArbitration
1355
+ ? {
1356
+ provider: consensusConfig?.arbitrator ?? 'gemini',
1357
+ model: getModelForProvider(consensusConfig, consensusConfig?.arbitrator ?? 'gemini'),
1358
+ temperature: 0.2,
1359
+ }
1360
+ : undefined;
1361
+
309
1362
  return new ConsensusRunner({
310
1363
  mode: 'independent',
311
1364
  minReviewers: 2,
@@ -313,5 +1366,9 @@ export function createConsensusRunner(
313
1366
  quorum: 2,
314
1367
  projectDir,
315
1368
  consensusConfig,
1369
+ arbitratorProvider,
1370
+ enableArbitration,
1371
+ skillLoader,
1372
+ skillUsageRegistry,
316
1373
  });
317
1374
  }