popeye-cli 2.2.0 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/gemini.d.ts +14 -0
- package/dist/adapters/gemini.d.ts.map +1 -1
- package/dist/adapters/gemini.js +41 -6
- package/dist/adapters/gemini.js.map +1 -1
- package/dist/adapters/grok.d.ts +14 -0
- package/dist/adapters/grok.d.ts.map +1 -1
- package/dist/adapters/grok.js +42 -6
- package/dist/adapters/grok.js.map +1 -1
- package/dist/adapters/openai.d.ts +10 -0
- package/dist/adapters/openai.d.ts.map +1 -1
- package/dist/adapters/openai.js +44 -5
- package/dist/adapters/openai.js.map +1 -1
- package/dist/cli/commands/create.js +1 -1
- package/dist/cli/commands/create.js.map +1 -1
- package/dist/cli/interactive.d.ts.map +1 -1
- package/dist/cli/interactive.js +324 -20
- package/dist/cli/interactive.js.map +1 -1
- package/dist/generators/all.d.ts.map +1 -1
- package/dist/generators/all.js +3 -2
- package/dist/generators/all.js.map +1 -1
- package/dist/generators/doc-parser.d.ts +21 -6
- package/dist/generators/doc-parser.d.ts.map +1 -1
- package/dist/generators/doc-parser.js +55 -4
- package/dist/generators/doc-parser.js.map +1 -1
- package/dist/generators/templates/fullstack.js +1 -1
- package/dist/generators/templates/website-components.js +1 -1
- package/dist/generators/templates/website-components.js.map +1 -1
- package/dist/generators/templates/website-config.d.ts +4 -1
- package/dist/generators/templates/website-config.d.ts.map +1 -1
- package/dist/generators/templates/website-config.js +17 -11
- package/dist/generators/templates/website-config.js.map +1 -1
- package/dist/generators/templates/website-conversion.js +1 -1
- package/dist/generators/templates/website-conversion.js.map +1 -1
- package/dist/generators/templates/website-landing.js +1 -1
- package/dist/generators/templates/website-landing.js.map +1 -1
- package/dist/generators/templates/website-layout.d.ts +36 -4
- package/dist/generators/templates/website-layout.d.ts.map +1 -1
- package/dist/generators/templates/website-layout.js +466 -23
- package/dist/generators/templates/website-layout.js.map +1 -1
- package/dist/generators/templates/website-pricing.js +1 -1
- package/dist/generators/templates/website-pricing.js.map +1 -1
- package/dist/generators/templates/website-sections.js +1 -1
- package/dist/generators/templates/website-sections.js.map +1 -1
- package/dist/generators/templates/website-seo.d.ts.map +1 -1
- package/dist/generators/templates/website-seo.js +4 -1
- package/dist/generators/templates/website-seo.js.map +1 -1
- package/dist/generators/templates/website.d.ts +1 -1
- package/dist/generators/templates/website.d.ts.map +1 -1
- package/dist/generators/templates/website.js +1 -1
- package/dist/generators/templates/website.js.map +1 -1
- package/dist/generators/website-content-ai.d.ts +52 -0
- package/dist/generators/website-content-ai.d.ts.map +1 -0
- package/dist/generators/website-content-ai.js +141 -0
- package/dist/generators/website-content-ai.js.map +1 -0
- package/dist/generators/website-content-scanner.d.ts +1 -1
- package/dist/generators/website-content-scanner.d.ts.map +1 -1
- package/dist/generators/website-content-scanner.js +98 -1
- package/dist/generators/website-content-scanner.js.map +1 -1
- package/dist/generators/website-context.d.ts +34 -1
- package/dist/generators/website-context.d.ts.map +1 -1
- package/dist/generators/website-context.js +131 -9
- package/dist/generators/website-context.js.map +1 -1
- package/dist/generators/website-debug.d.ts +12 -0
- package/dist/generators/website-debug.d.ts.map +1 -1
- package/dist/generators/website-debug.js +16 -0
- package/dist/generators/website-debug.js.map +1 -1
- package/dist/generators/website.d.ts.map +1 -1
- package/dist/generators/website.js +26 -4
- package/dist/generators/website.js.map +1 -1
- package/dist/pipeline/auto-recovery.d.ts +56 -0
- package/dist/pipeline/auto-recovery.d.ts.map +1 -0
- package/dist/pipeline/auto-recovery.js +185 -0
- package/dist/pipeline/auto-recovery.js.map +1 -0
- package/dist/pipeline/change-request.d.ts +39 -0
- package/dist/pipeline/change-request.d.ts.map +1 -1
- package/dist/pipeline/change-request.js +40 -1
- package/dist/pipeline/change-request.js.map +1 -1
- package/dist/pipeline/check-runner.d.ts +30 -1
- package/dist/pipeline/check-runner.d.ts.map +1 -1
- package/dist/pipeline/check-runner.js +122 -1
- package/dist/pipeline/check-runner.js.map +1 -1
- package/dist/pipeline/command-resolver.d.ts.map +1 -1
- package/dist/pipeline/command-resolver.js +33 -2
- package/dist/pipeline/command-resolver.js.map +1 -1
- package/dist/pipeline/consensus/arbitrator-query.d.ts +22 -0
- package/dist/pipeline/consensus/arbitrator-query.d.ts.map +1 -0
- package/dist/pipeline/consensus/arbitrator-query.js +70 -0
- package/dist/pipeline/consensus/arbitrator-query.js.map +1 -0
- package/dist/pipeline/consensus/consensus-runner.d.ts +131 -7
- package/dist/pipeline/consensus/consensus-runner.d.ts.map +1 -1
- package/dist/pipeline/consensus/consensus-runner.js +809 -35
- package/dist/pipeline/consensus/consensus-runner.js.map +1 -1
- package/dist/pipeline/cr-lifecycle.d.ts +42 -0
- package/dist/pipeline/cr-lifecycle.d.ts.map +1 -0
- package/dist/pipeline/cr-lifecycle.js +89 -0
- package/dist/pipeline/cr-lifecycle.js.map +1 -0
- package/dist/pipeline/gate-engine.d.ts +1 -0
- package/dist/pipeline/gate-engine.d.ts.map +1 -1
- package/dist/pipeline/gate-engine.js +26 -7
- package/dist/pipeline/gate-engine.js.map +1 -1
- package/dist/pipeline/orchestrator.d.ts +1 -1
- package/dist/pipeline/orchestrator.d.ts.map +1 -1
- package/dist/pipeline/orchestrator.js +306 -16
- package/dist/pipeline/orchestrator.js.map +1 -1
- package/dist/pipeline/packets/consensus-packet-builder.d.ts +15 -4
- package/dist/pipeline/packets/consensus-packet-builder.d.ts.map +1 -1
- package/dist/pipeline/packets/consensus-packet-builder.js +29 -17
- package/dist/pipeline/packets/consensus-packet-builder.js.map +1 -1
- package/dist/pipeline/phases/architecture.d.ts.map +1 -1
- package/dist/pipeline/phases/architecture.js +5 -3
- package/dist/pipeline/phases/architecture.js.map +1 -1
- package/dist/pipeline/phases/audit.d.ts.map +1 -1
- package/dist/pipeline/phases/audit.js +5 -3
- package/dist/pipeline/phases/audit.js.map +1 -1
- package/dist/pipeline/phases/consensus-architecture.d.ts.map +1 -1
- package/dist/pipeline/phases/consensus-architecture.js +10 -1
- package/dist/pipeline/phases/consensus-architecture.js.map +1 -1
- package/dist/pipeline/phases/consensus-master-plan.d.ts.map +1 -1
- package/dist/pipeline/phases/consensus-master-plan.js +10 -3
- package/dist/pipeline/phases/consensus-master-plan.js.map +1 -1
- package/dist/pipeline/phases/consensus-role-plans.d.ts.map +1 -1
- package/dist/pipeline/phases/consensus-role-plans.js +10 -1
- package/dist/pipeline/phases/consensus-role-plans.js.map +1 -1
- package/dist/pipeline/phases/done.d.ts.map +1 -1
- package/dist/pipeline/phases/done.js +9 -4
- package/dist/pipeline/phases/done.js.map +1 -1
- package/dist/pipeline/phases/intake.d.ts.map +1 -1
- package/dist/pipeline/phases/intake.js +7 -3
- package/dist/pipeline/phases/intake.js.map +1 -1
- package/dist/pipeline/phases/phase-context.d.ts +2 -0
- package/dist/pipeline/phases/phase-context.d.ts.map +1 -1
- package/dist/pipeline/phases/phase-context.js +3 -1
- package/dist/pipeline/phases/phase-context.js.map +1 -1
- package/dist/pipeline/phases/production-gate.d.ts.map +1 -1
- package/dist/pipeline/phases/production-gate.js +28 -3
- package/dist/pipeline/phases/production-gate.js.map +1 -1
- package/dist/pipeline/phases/qa-validation.d.ts.map +1 -1
- package/dist/pipeline/phases/qa-validation.js +38 -5
- package/dist/pipeline/phases/qa-validation.js.map +1 -1
- package/dist/pipeline/phases/recovery-loop.d.ts +2 -0
- package/dist/pipeline/phases/recovery-loop.d.ts.map +1 -1
- package/dist/pipeline/phases/recovery-loop.js +200 -6
- package/dist/pipeline/phases/recovery-loop.js.map +1 -1
- package/dist/pipeline/phases/review.d.ts.map +1 -1
- package/dist/pipeline/phases/review.js +58 -28
- package/dist/pipeline/phases/review.js.map +1 -1
- package/dist/pipeline/phases/role-planning.d.ts.map +1 -1
- package/dist/pipeline/phases/role-planning.js +18 -2
- package/dist/pipeline/phases/role-planning.js.map +1 -1
- package/dist/pipeline/phases/stuck.d.ts.map +1 -1
- package/dist/pipeline/phases/stuck.js +10 -0
- package/dist/pipeline/phases/stuck.js.map +1 -1
- package/dist/pipeline/repo-snapshot.d.ts.map +1 -1
- package/dist/pipeline/repo-snapshot.js +3 -0
- package/dist/pipeline/repo-snapshot.js.map +1 -1
- package/dist/pipeline/role-execution-adapter.d.ts +2 -1
- package/dist/pipeline/role-execution-adapter.d.ts.map +1 -1
- package/dist/pipeline/role-execution-adapter.js +22 -7
- package/dist/pipeline/role-execution-adapter.js.map +1 -1
- package/dist/pipeline/skill-loader.d.ts +19 -0
- package/dist/pipeline/skill-loader.d.ts.map +1 -1
- package/dist/pipeline/skill-loader.js +22 -0
- package/dist/pipeline/skill-loader.js.map +1 -1
- package/dist/pipeline/skills/coverage-gate.d.ts +44 -0
- package/dist/pipeline/skills/coverage-gate.d.ts.map +1 -0
- package/dist/pipeline/skills/coverage-gate.js +143 -0
- package/dist/pipeline/skills/coverage-gate.js.map +1 -0
- package/dist/pipeline/skills/usage-registry.d.ts +48 -0
- package/dist/pipeline/skills/usage-registry.d.ts.map +1 -0
- package/dist/pipeline/skills/usage-registry.js +55 -0
- package/dist/pipeline/skills/usage-registry.js.map +1 -0
- package/dist/pipeline/strategy-context.d.ts +20 -0
- package/dist/pipeline/strategy-context.d.ts.map +1 -0
- package/dist/pipeline/strategy-context.js +55 -0
- package/dist/pipeline/strategy-context.js.map +1 -0
- package/dist/pipeline/type-defs/artifacts.d.ts +25 -5
- package/dist/pipeline/type-defs/artifacts.d.ts.map +1 -1
- package/dist/pipeline/type-defs/artifacts.js +4 -0
- package/dist/pipeline/type-defs/artifacts.js.map +1 -1
- package/dist/pipeline/type-defs/audit.d.ts +25 -13
- package/dist/pipeline/type-defs/audit.d.ts.map +1 -1
- package/dist/pipeline/type-defs/checks.d.ts +18 -8
- package/dist/pipeline/type-defs/checks.d.ts.map +1 -1
- package/dist/pipeline/type-defs/checks.js +4 -0
- package/dist/pipeline/type-defs/checks.js.map +1 -1
- package/dist/pipeline/type-defs/packets.d.ts +104 -18
- package/dist/pipeline/type-defs/packets.d.ts.map +1 -1
- package/dist/pipeline/type-defs/packets.js +17 -1
- package/dist/pipeline/type-defs/packets.js.map +1 -1
- package/dist/pipeline/type-defs/state.d.ts +160 -16
- package/dist/pipeline/type-defs/state.d.ts.map +1 -1
- package/dist/pipeline/type-defs/state.js +26 -1
- package/dist/pipeline/type-defs/state.js.map +1 -1
- package/dist/shared/text-utils.d.ts +23 -0
- package/dist/shared/text-utils.d.ts.map +1 -0
- package/dist/shared/text-utils.js +66 -0
- package/dist/shared/text-utils.js.map +1 -0
- package/dist/shared/website-strategy-format.d.ts +18 -0
- package/dist/shared/website-strategy-format.d.ts.map +1 -0
- package/dist/shared/website-strategy-format.js +47 -0
- package/dist/shared/website-strategy-format.js.map +1 -0
- package/dist/state/index.d.ts +2 -0
- package/dist/state/index.d.ts.map +1 -1
- package/dist/state/index.js +57 -8
- package/dist/state/index.js.map +1 -1
- package/dist/types/consensus.d.ts +1 -0
- package/dist/types/consensus.d.ts.map +1 -1
- package/dist/types/consensus.js.map +1 -1
- package/dist/types/website-strategy.d.ts +1 -1
- package/dist/types/workflow.d.ts +447 -0
- package/dist/types/workflow.d.ts.map +1 -1
- package/dist/types/workflow.js +3 -0
- package/dist/types/workflow.js.map +1 -1
- package/dist/upgrade/handlers.d.ts.map +1 -1
- package/dist/upgrade/handlers.js +6 -3
- package/dist/upgrade/handlers.js.map +1 -1
- package/dist/workflow/consensus.d.ts.map +1 -1
- package/dist/workflow/consensus.js +1 -0
- package/dist/workflow/consensus.js.map +1 -1
- package/dist/workflow/website-strategy.d.ts.map +1 -1
- package/dist/workflow/website-strategy.js +2 -29
- package/dist/workflow/website-strategy.js.map +1 -1
- package/dist/workflow/website-updater.d.ts.map +1 -1
- package/dist/workflow/website-updater.js +3 -2
- package/dist/workflow/website-updater.js.map +1 -1
- package/package.json +1 -1
- package/src/adapters/gemini.ts +51 -6
- package/src/adapters/grok.ts +51 -6
- package/src/adapters/openai.ts +53 -5
- package/src/cli/commands/create.ts +1 -1
- package/src/cli/interactive.ts +333 -19
- package/src/generators/all.ts +3 -2
- package/src/generators/doc-parser.ts +75 -15
- package/src/generators/templates/fullstack.ts +1 -1
- package/src/generators/templates/website-components.ts +1 -1
- package/src/generators/templates/website-config.ts +23 -11
- package/src/generators/templates/website-conversion.ts +1 -1
- package/src/generators/templates/website-landing.ts +1 -1
- package/src/generators/templates/website-layout.ts +491 -23
- package/src/generators/templates/website-pricing.ts +1 -1
- package/src/generators/templates/website-sections.ts +1 -1
- package/src/generators/templates/website-seo.ts +4 -1
- package/src/generators/templates/website.ts +3 -0
- package/src/generators/website-content-ai.ts +186 -0
- package/src/generators/website-content-scanner.ts +113 -1
- package/src/generators/website-context.ts +151 -12
- package/src/generators/website-debug.ts +26 -0
- package/src/generators/website.ts +28 -3
- package/src/pipeline/auto-recovery.ts +283 -0
- package/src/pipeline/change-request.ts +63 -1
- package/src/pipeline/check-runner.ts +141 -2
- package/src/pipeline/command-resolver.ts +34 -2
- package/src/pipeline/consensus/arbitrator-query.ts +101 -0
- package/src/pipeline/consensus/consensus-runner.ts +1099 -42
- package/src/pipeline/cr-lifecycle.ts +103 -0
- package/src/pipeline/gate-engine.ts +35 -7
- package/src/pipeline/orchestrator.ts +361 -16
- package/src/pipeline/packets/consensus-packet-builder.ts +44 -18
- package/src/pipeline/phases/architecture.ts +6 -3
- package/src/pipeline/phases/audit.ts +6 -3
- package/src/pipeline/phases/consensus-architecture.ts +10 -1
- package/src/pipeline/phases/consensus-master-plan.ts +10 -3
- package/src/pipeline/phases/consensus-role-plans.ts +10 -1
- package/src/pipeline/phases/done.ts +15 -4
- package/src/pipeline/phases/intake.ts +7 -3
- package/src/pipeline/phases/phase-context.ts +6 -1
- package/src/pipeline/phases/production-gate.ts +41 -3
- package/src/pipeline/phases/qa-validation.ts +51 -5
- package/src/pipeline/phases/recovery-loop.ts +229 -7
- package/src/pipeline/phases/review.ts +73 -30
- package/src/pipeline/phases/role-planning.ts +21 -2
- package/src/pipeline/phases/stuck.ts +10 -0
- package/src/pipeline/repo-snapshot.ts +3 -0
- package/src/pipeline/role-execution-adapter.ts +30 -4
- package/src/pipeline/skill-loader.ts +33 -0
- package/src/pipeline/skills/coverage-gate.ts +199 -0
- package/src/pipeline/skills/usage-registry.ts +87 -0
- package/src/pipeline/strategy-context.ts +60 -0
- package/src/pipeline/type-defs/artifacts.ts +4 -0
- package/src/pipeline/type-defs/checks.ts +4 -0
- package/src/pipeline/type-defs/packets.ts +18 -1
- package/src/pipeline/type-defs/state.ts +26 -1
- package/src/shared/text-utils.ts +70 -0
- package/src/shared/website-strategy-format.ts +56 -0
- package/src/state/index.ts +60 -8
- package/src/types/consensus.ts +1 -0
- package/src/types/workflow.ts +6 -0
- package/src/upgrade/handlers.ts +9 -3
- package/src/workflow/consensus.ts +1 -0
- package/src/workflow/website-strategy.ts +2 -36
- package/src/workflow/website-updater.ts +4 -2
- package/tests/adapters/gemini.test.ts +165 -0
- package/tests/adapters/grok.test.ts +137 -0
- package/tests/adapters/openai.test.ts +128 -0
- package/tests/generators/doc-parser.test.ts +88 -9
- package/tests/generators/quality-gate.test.ts +19 -3
- package/tests/generators/website-components.test.ts +34 -0
- package/tests/generators/website-content-ai.test.ts +308 -0
- package/tests/generators/website-content-scanner.test.ts +86 -0
- package/tests/generators/website-context.test.ts +3 -2
- package/tests/integration/smokestack-scaffold.test.ts +385 -0
- package/tests/pipeline/auto-recovery.test.ts +337 -0
- package/tests/pipeline/change-request.test.ts +70 -0
- package/tests/pipeline/command-resolver.test.ts +42 -0
- package/tests/pipeline/consensus/arbitrator-query.test.ts +107 -0
- package/tests/pipeline/consensus-runner.test.ts +1333 -10
- package/tests/pipeline/consensus-scoring.test.ts +602 -18
- package/tests/pipeline/gate-engine.test.ts +34 -0
- package/tests/pipeline/install-check.test.ts +261 -0
- package/tests/pipeline/orchestrator.test.ts +1506 -15
- package/tests/pipeline/packets/builders.test.ts +29 -6
- package/tests/pipeline/phases/role-planning.strategy.test.ts +204 -0
- package/tests/pipeline/pipeline-persistence.test.ts +230 -0
- package/tests/pipeline/recovery-loop-guidance.test.ts +280 -0
- package/tests/pipeline/role-execution-adapter.test.ts +88 -0
- package/tests/pipeline/skills/coverage-gate.test.ts +370 -0
- package/tests/pipeline/skills/usage-registry.test.ts +114 -0
- package/tests/pipeline/strategy-context.test.ts +148 -0
- package/tests/shared/text-utils.test.ts +155 -0
- package/tests/state/progress-analysis.test.ts +375 -0
- package/tests/upgrade/handlers.test.ts +33 -2
- package/tests/workflow/consensus.test.ts +6 -0
- package/tsconfig.json +1 -1
|
@@ -1,11 +1,25 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Consensus Runner tests — vote aggregation, packet construction,
|
|
3
|
-
* prompt building
|
|
3
|
+
* prompt building, normalization wiring, arbitration triggers.
|
|
4
|
+
* (LLM calls are not tested here.)
|
|
4
5
|
*/
|
|
5
6
|
|
|
6
|
-
import { describe, it, expect } from 'vitest';
|
|
7
|
+
import { describe, it, expect, vi } from 'vitest';
|
|
8
|
+
import * as fs from 'node:fs';
|
|
9
|
+
import * as path from 'node:path';
|
|
10
|
+
import * as os from 'node:os';
|
|
7
11
|
import {
|
|
8
12
|
buildReviewPrompt,
|
|
13
|
+
mapVote,
|
|
14
|
+
hasVoteDisagreement,
|
|
15
|
+
normalizeVoteBlockers,
|
|
16
|
+
DEFAULT_CONDITIONAL_FLOOR,
|
|
17
|
+
ConsensusRunner,
|
|
18
|
+
parseRawReviewResponse,
|
|
19
|
+
parseArbitratorResponse,
|
|
20
|
+
loadPlanContent,
|
|
21
|
+
correctConfidenceContradiction,
|
|
22
|
+
getArbitrationTrigger,
|
|
9
23
|
} from '../../src/pipeline/consensus/consensus-runner.js';
|
|
10
24
|
import {
|
|
11
25
|
buildConsensusPacket,
|
|
@@ -105,7 +119,7 @@ describe('ConsensusRunner', () => {
|
|
|
105
119
|
expect(prompt).toContain('Auth strategy?');
|
|
106
120
|
});
|
|
107
121
|
|
|
108
|
-
it('should include review instructions', () => {
|
|
122
|
+
it('should include review instructions with scoring guide', () => {
|
|
109
123
|
const packet = makePlanPacket();
|
|
110
124
|
const prompt = buildReviewPrompt(packet);
|
|
111
125
|
|
|
@@ -113,6 +127,92 @@ describe('ConsensusRunner', () => {
|
|
|
113
127
|
expect(prompt).toContain('REJECT');
|
|
114
128
|
expect(prompt).toContain('CONDITIONAL');
|
|
115
129
|
expect(prompt).toContain('Completeness');
|
|
130
|
+
expect(prompt).toContain('Scoring Guide');
|
|
131
|
+
expect(prompt).toContain('[BLOCKER]');
|
|
132
|
+
expect(prompt).toContain('[REQUIRED]');
|
|
133
|
+
expect(prompt).toContain('[SUGGESTION]');
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
it('should include revision notice for version > 1 (v2.4.2)', () => {
|
|
137
|
+
const packet = makePlanPacket({
|
|
138
|
+
metadata: {
|
|
139
|
+
packet_id: 'plan-2',
|
|
140
|
+
timestamp: new Date().toISOString(),
|
|
141
|
+
phase: 'INTAKE',
|
|
142
|
+
submitted_by: 'DISPATCHER',
|
|
143
|
+
version: 2,
|
|
144
|
+
},
|
|
145
|
+
});
|
|
146
|
+
const prompt = buildReviewPrompt(packet);
|
|
147
|
+
|
|
148
|
+
expect(prompt).toContain('Revision Notice');
|
|
149
|
+
expect(prompt).toContain('prior issues');
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
it('should NOT include revision notice for version 1', () => {
|
|
153
|
+
const packet = makePlanPacket();
|
|
154
|
+
const prompt = buildReviewPrompt(packet);
|
|
155
|
+
|
|
156
|
+
expect(prompt).not.toContain('Revision Notice');
|
|
157
|
+
});
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
describe('mapVote', () => {
|
|
161
|
+
it('should APPROVE when confidence meets threshold', () => {
|
|
162
|
+
expect(mapVote(0.96, 0.95)).toBe('APPROVE');
|
|
163
|
+
expect(mapVote(0.95, 0.95)).toBe('APPROVE');
|
|
164
|
+
expect(mapVote(1.0, 0.95)).toBe('APPROVE');
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
it('should CONDITIONAL for floor to threshold', () => {
|
|
168
|
+
expect(mapVote(0.94, 0.95)).toBe('CONDITIONAL');
|
|
169
|
+
expect(mapVote(0.90, 0.95)).toBe('CONDITIONAL');
|
|
170
|
+
expect(mapVote(0.85, 0.95)).toBe('CONDITIONAL');
|
|
171
|
+
expect(mapVote(0.80, 0.95)).toBe('CONDITIONAL');
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
it('should REJECT below floor', () => {
|
|
175
|
+
expect(mapVote(0.79, 0.95)).toBe('REJECT');
|
|
176
|
+
expect(mapVote(0.50, 0.95)).toBe('REJECT');
|
|
177
|
+
expect(mapVote(0.0, 0.95)).toBe('REJECT');
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
it('should respect custom thresholds', () => {
|
|
181
|
+
expect(mapVote(0.90, 0.90)).toBe('APPROVE');
|
|
182
|
+
expect(mapVote(0.85, 0.90)).toBe('CONDITIONAL');
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
it('should clamp out-of-range inputs', () => {
|
|
186
|
+
expect(mapVote(1.5, 0.95)).toBe('APPROVE');
|
|
187
|
+
expect(mapVote(-0.1, 0.95)).toBe('REJECT');
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
it('should handle conditionalFloor > threshold by clamping floor', () => {
|
|
191
|
+
expect(mapVote(0.90, 0.85, 0.95)).toBe('APPROVE');
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
it('should export DEFAULT_CONDITIONAL_FLOOR as 0.80', () => {
|
|
195
|
+
expect(DEFAULT_CONDITIONAL_FLOOR).toBe(0.80);
|
|
196
|
+
});
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
describe('hasVoteDisagreement', () => {
|
|
200
|
+
it('should return false for single vote', () => {
|
|
201
|
+
expect(hasVoteDisagreement([makeVote('r1', 'APPROVE')])).toBe(false);
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
it('should return false for unanimous votes', () => {
|
|
205
|
+
expect(hasVoteDisagreement([
|
|
206
|
+
makeVote('r1', 'APPROVE'),
|
|
207
|
+
makeVote('r2', 'APPROVE'),
|
|
208
|
+
])).toBe(false);
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
it('should return true for mixed votes', () => {
|
|
212
|
+
expect(hasVoteDisagreement([
|
|
213
|
+
makeVote('r1', 'APPROVE'),
|
|
214
|
+
makeVote('r2', 'REJECT'),
|
|
215
|
+
])).toBe(true);
|
|
116
216
|
});
|
|
117
217
|
});
|
|
118
218
|
|
|
@@ -120,7 +220,10 @@ describe('ConsensusRunner', () => {
|
|
|
120
220
|
it('should approve when all reviewers approve with sufficient quorum', () => {
|
|
121
221
|
const packet = buildConsensusPacket({
|
|
122
222
|
planPacketRef: makeRef(),
|
|
123
|
-
votes: [
|
|
223
|
+
votes: [
|
|
224
|
+
makeVote('r1', 'APPROVE', 0.96),
|
|
225
|
+
makeVote('r2', 'APPROVE', 0.97),
|
|
226
|
+
],
|
|
124
227
|
rules: { threshold: 0.95, quorum: 2, min_reviewers: 2 },
|
|
125
228
|
});
|
|
126
229
|
|
|
@@ -142,9 +245,9 @@ describe('ConsensusRunner', () => {
|
|
|
142
245
|
|
|
143
246
|
it('should handle multi-provider votes', () => {
|
|
144
247
|
const votes: ReviewerVote[] = [
|
|
145
|
-
{ ...makeVote('r1', 'APPROVE'), provider: 'openai', model: 'gpt-4o' },
|
|
146
|
-
{ ...makeVote('r2', 'APPROVE'), provider: 'gemini', model: 'gemini-2.0-flash' },
|
|
147
|
-
{ ...makeVote('r3', 'APPROVE'), provider: 'grok', model: 'grok-3' },
|
|
248
|
+
{ ...makeVote('r1', 'APPROVE', 0.96), provider: 'openai', model: 'gpt-4o' },
|
|
249
|
+
{ ...makeVote('r2', 'APPROVE', 0.97), provider: 'gemini', model: 'gemini-2.0-flash' },
|
|
250
|
+
{ ...makeVote('r3', 'APPROVE', 0.98), provider: 'grok', model: 'grok-3' },
|
|
148
251
|
];
|
|
149
252
|
|
|
150
253
|
const packet = buildConsensusPacket({
|
|
@@ -164,7 +267,6 @@ describe('ConsensusRunner', () => {
|
|
|
164
267
|
rules: { threshold: 0.95, quorum: 2, min_reviewers: 2 },
|
|
165
268
|
});
|
|
166
269
|
|
|
167
|
-
// CONDITIONAL is not APPROVE, so score = 0.5
|
|
168
270
|
expect(packet.consensus_result.score).toBe(0.5);
|
|
169
271
|
expect(packet.final_status).toBe('REJECTED');
|
|
170
272
|
});
|
|
@@ -183,11 +285,10 @@ describe('ConsensusRunner', () => {
|
|
|
183
285
|
it('should reject when quorum not met', () => {
|
|
184
286
|
const packet = buildConsensusPacket({
|
|
185
287
|
planPacketRef: makeRef(),
|
|
186
|
-
votes: [makeVote('r1', 'APPROVE')],
|
|
288
|
+
votes: [makeVote('r1', 'APPROVE', 0.96)],
|
|
187
289
|
rules: { threshold: 0.5, quorum: 2, min_reviewers: 2 },
|
|
188
290
|
});
|
|
189
291
|
|
|
190
|
-
// 1 approver, score = 1.0, but quorum = 2, only 1 voter
|
|
191
292
|
expect(packet.consensus_result.approved).toBe(false);
|
|
192
293
|
});
|
|
193
294
|
|
|
@@ -203,4 +304,1226 @@ describe('ConsensusRunner', () => {
|
|
|
203
304
|
expect(packet.metadata.plan_packet_id).toBe(planRef.artifact_id);
|
|
204
305
|
});
|
|
205
306
|
});
|
|
307
|
+
|
|
308
|
+
describe('arbitration triggers', () => {
|
|
309
|
+
it('triggers arbitration on vote disagreement when enableArbitration=true', () => {
|
|
310
|
+
// We test the shouldArbitrate logic indirectly through normalizedVotes + hasVoteDisagreement
|
|
311
|
+
const votes = [
|
|
312
|
+
makeVote('r1', 'APPROVE', 0.96),
|
|
313
|
+
makeVote('r2', 'REJECT', 0.5),
|
|
314
|
+
];
|
|
315
|
+
expect(hasVoteDisagreement(votes)).toBe(true);
|
|
316
|
+
|
|
317
|
+
// The actual arbitration call requires LLM, so we verify the condition only
|
|
318
|
+
const runner = new ConsensusRunner({
|
|
319
|
+
mode: 'independent',
|
|
320
|
+
minReviewers: 2,
|
|
321
|
+
threshold: 0.95,
|
|
322
|
+
quorum: 2,
|
|
323
|
+
projectDir: '/tmp/test',
|
|
324
|
+
enableArbitration: true,
|
|
325
|
+
arbitratorProvider: { provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.2 },
|
|
326
|
+
});
|
|
327
|
+
// Verify config is set
|
|
328
|
+
expect(runner).toBeDefined();
|
|
329
|
+
});
|
|
330
|
+
|
|
331
|
+
it('does not trigger when enableArbitration=false', () => {
|
|
332
|
+
const runner = new ConsensusRunner({
|
|
333
|
+
mode: 'independent',
|
|
334
|
+
minReviewers: 2,
|
|
335
|
+
threshold: 0.95,
|
|
336
|
+
quorum: 2,
|
|
337
|
+
projectDir: '/tmp/test',
|
|
338
|
+
enableArbitration: false,
|
|
339
|
+
});
|
|
340
|
+
// No arbitratorProvider means no arbitration call
|
|
341
|
+
expect(runner).toBeDefined();
|
|
342
|
+
});
|
|
343
|
+
|
|
344
|
+
it('triggers on "death by conditional" (all CONDITIONAL, avg conf >= 0.94, required_changes <= 3)', () => {
|
|
345
|
+
const votes: ReviewerVote[] = [
|
|
346
|
+
{
|
|
347
|
+
...makeVote('r1', 'CONDITIONAL', 0.94),
|
|
348
|
+
blocking_issues: [],
|
|
349
|
+
required_changes: ['Add error handling'],
|
|
350
|
+
},
|
|
351
|
+
{
|
|
352
|
+
...makeVote('r2', 'CONDITIONAL', 0.95),
|
|
353
|
+
blocking_issues: [],
|
|
354
|
+
required_changes: ['Add input validation'],
|
|
355
|
+
},
|
|
356
|
+
];
|
|
357
|
+
|
|
358
|
+
const allConditional = votes.every(v => v.vote === 'CONDITIONAL');
|
|
359
|
+
const avgConf = votes.reduce((s, v) => s + v.confidence, 0) / votes.length;
|
|
360
|
+
const totalRequired = votes.reduce((s, v) => s + (v.required_changes?.length ?? 0), 0);
|
|
361
|
+
|
|
362
|
+
expect(allConditional).toBe(true);
|
|
363
|
+
expect(avgConf).toBeGreaterThanOrEqual(0.94);
|
|
364
|
+
expect(totalRequired).toBeLessThanOrEqual(3);
|
|
365
|
+
});
|
|
366
|
+
|
|
367
|
+
it('does NOT trigger "death by conditional" when required_changes > 3', () => {
|
|
368
|
+
const votes: ReviewerVote[] = [
|
|
369
|
+
{
|
|
370
|
+
...makeVote('r1', 'CONDITIONAL', 0.94),
|
|
371
|
+
blocking_issues: [],
|
|
372
|
+
required_changes: ['Fix A', 'Fix B'],
|
|
373
|
+
},
|
|
374
|
+
{
|
|
375
|
+
...makeVote('r2', 'CONDITIONAL', 0.95),
|
|
376
|
+
blocking_issues: [],
|
|
377
|
+
required_changes: ['Fix C', 'Fix D'],
|
|
378
|
+
},
|
|
379
|
+
];
|
|
380
|
+
|
|
381
|
+
const totalRequired = votes.reduce((s, v) => s + (v.required_changes?.length ?? 0), 0);
|
|
382
|
+
expect(totalRequired).toBe(4);
|
|
383
|
+
expect(totalRequired).toBeGreaterThan(3);
|
|
384
|
+
});
|
|
385
|
+
|
|
386
|
+
it('v2.4.2: caps at 1 attempt per phase+version (version-keyed tracking)', () => {
|
|
387
|
+
const runner = new ConsensusRunner({
|
|
388
|
+
mode: 'independent',
|
|
389
|
+
minReviewers: 2,
|
|
390
|
+
threshold: 0.95,
|
|
391
|
+
quorum: 2,
|
|
392
|
+
projectDir: '/tmp/test',
|
|
393
|
+
enableArbitration: true,
|
|
394
|
+
arbitratorProvider: { provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.2 },
|
|
395
|
+
});
|
|
396
|
+
|
|
397
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
398
|
+
const attempted = (runner as any).arbitrationAttempted as Set<string>;
|
|
399
|
+
attempted.add('CONSENSUS_MASTER_PLAN@v1');
|
|
400
|
+
expect(attempted.has('CONSENSUS_MASTER_PLAN@v1')).toBe(true);
|
|
401
|
+
// Same phase with new version is NOT blocked
|
|
402
|
+
expect(attempted.has('CONSENSUS_MASTER_PLAN@v2')).toBe(false);
|
|
403
|
+
// Different phase is NOT blocked
|
|
404
|
+
expect(attempted.has('CONSENSUS_ARCHITECTURE@v1')).toBe(false);
|
|
405
|
+
});
|
|
406
|
+
|
|
407
|
+
it('ARBITRATED packet includes arbitrator_result', () => {
|
|
408
|
+
const packet = buildConsensusPacket({
|
|
409
|
+
planPacketRef: makeRef(),
|
|
410
|
+
votes: [makeVote('r1', 'CONDITIONAL', 0.88)],
|
|
411
|
+
rules: { threshold: 0.95, quorum: 1, min_reviewers: 1 },
|
|
412
|
+
arbitratorResult: {
|
|
413
|
+
decision: 'Plan is execution-ready with minor amendments',
|
|
414
|
+
merged_patch: 'Add error handling to endpoint /api/users',
|
|
415
|
+
},
|
|
416
|
+
});
|
|
417
|
+
|
|
418
|
+
expect(packet.final_status).toBe('ARBITRATED');
|
|
419
|
+
expect(packet.arbitrator_result).toBeDefined();
|
|
420
|
+
expect(packet.arbitrator_result?.decision).toContain('execution-ready');
|
|
421
|
+
expect(packet.arbitrator_result?.merged_patch).toContain('error handling');
|
|
422
|
+
});
|
|
423
|
+
});
|
|
424
|
+
|
|
425
|
+
describe('parseRawReviewResponse — JSON parsing', () => {
|
|
426
|
+
it('parses JSON response with APPROVE and 0.96 confidence', () => {
|
|
427
|
+
const raw = JSON.stringify({
|
|
428
|
+
vote: 'APPROVE',
|
|
429
|
+
confidence: 0.96,
|
|
430
|
+
blocking_issues: [],
|
|
431
|
+
required_changes: [],
|
|
432
|
+
suggestions: ['Consider adding rate limiting'],
|
|
433
|
+
analysis: 'Plan looks solid.',
|
|
434
|
+
});
|
|
435
|
+
const result = parseRawReviewResponse(raw);
|
|
436
|
+
|
|
437
|
+
expect(result.confidence).toBe(0.96);
|
|
438
|
+
expect(result.modelVote).toBe('APPROVE');
|
|
439
|
+
expect(result.blockingIssues).toEqual([]);
|
|
440
|
+
expect(result.suggestions).toEqual(['Consider adding rate limiting']);
|
|
441
|
+
});
|
|
442
|
+
|
|
443
|
+
it('parses JSON wrapped in markdown code fences', () => {
|
|
444
|
+
const raw = '```json\n' + JSON.stringify({
|
|
445
|
+
vote: 'CONDITIONAL',
|
|
446
|
+
confidence: 0.88,
|
|
447
|
+
blocking_issues: [],
|
|
448
|
+
required_changes: ['[REQUIRED] Add error handling'],
|
|
449
|
+
suggestions: [],
|
|
450
|
+
analysis: 'Needs work.',
|
|
451
|
+
}) + '\n```';
|
|
452
|
+
const result = parseRawReviewResponse(raw);
|
|
453
|
+
|
|
454
|
+
expect(result.confidence).toBe(0.88);
|
|
455
|
+
expect(result.modelVote).toBe('CONDITIONAL');
|
|
456
|
+
expect(result.requiredChanges).toEqual(['[REQUIRED] Add error handling']);
|
|
457
|
+
});
|
|
458
|
+
|
|
459
|
+
it('parses JSON wrapped in code fences without json label', () => {
|
|
460
|
+
const raw = '```\n' + JSON.stringify({
|
|
461
|
+
vote: 'REJECT',
|
|
462
|
+
confidence: 0.45,
|
|
463
|
+
blocking_issues: ['[BLOCKER] Missing auth'],
|
|
464
|
+
required_changes: [],
|
|
465
|
+
suggestions: [],
|
|
466
|
+
}) + '\n```';
|
|
467
|
+
const result = parseRawReviewResponse(raw);
|
|
468
|
+
|
|
469
|
+
expect(result.confidence).toBe(0.45);
|
|
470
|
+
expect(result.modelVote).toBe('REJECT');
|
|
471
|
+
expect(result.blockingIssues).toEqual(['[BLOCKER] Missing auth']);
|
|
472
|
+
});
|
|
473
|
+
|
|
474
|
+
it('returns null for invalid JSON and falls back to regex', () => {
|
|
475
|
+
const raw = 'This is not JSON but APPROVE with CONFIDENCE: 0.92';
|
|
476
|
+
const result = parseRawReviewResponse(raw);
|
|
477
|
+
|
|
478
|
+
// APPROVE + 0.92 is in [0.80, 0.95) range -> corrected to 0.95 by contradiction detector
|
|
479
|
+
expect(result.confidence).toBe(0.95);
|
|
480
|
+
expect(result.modelVote).toBe('APPROVE');
|
|
481
|
+
});
|
|
482
|
+
});
|
|
483
|
+
|
|
484
|
+
describe('parseRawReviewResponse — regex fallback', () => {
|
|
485
|
+
it('parses CONDITIONAL with 0.88 confidence via regex', () => {
|
|
486
|
+
const raw = `VOTE: CONDITIONAL
|
|
487
|
+
CONFIDENCE: 0.88
|
|
488
|
+
|
|
489
|
+
REQUIRED CHANGES:
|
|
490
|
+
- Add input validation
|
|
491
|
+
- Improve error messages
|
|
492
|
+
|
|
493
|
+
SUGGESTIONS:
|
|
494
|
+
- Consider caching`;
|
|
495
|
+
|
|
496
|
+
const result = parseRawReviewResponse(raw);
|
|
497
|
+
expect(result.confidence).toBe(0.88);
|
|
498
|
+
expect(result.modelVote).toBe('CONDITIONAL');
|
|
499
|
+
});
|
|
500
|
+
|
|
501
|
+
it('parses REJECT with blocking issues via regex', () => {
|
|
502
|
+
const raw = `VOTE: REJECT
|
|
503
|
+
CONFIDENCE: 0.55
|
|
504
|
+
|
|
505
|
+
[BLOCKER] Missing authentication
|
|
506
|
+
[BLOCKER] No rate limiting
|
|
507
|
+
[SUGGESTION] Add logging`;
|
|
508
|
+
|
|
509
|
+
const result = parseRawReviewResponse(raw);
|
|
510
|
+
expect(result.confidence).toBe(0.55);
|
|
511
|
+
expect(result.modelVote).toBe('REJECT');
|
|
512
|
+
expect(result.blockingIssues).toContain('Missing authentication');
|
|
513
|
+
expect(result.blockingIssues).toContain('No rate limiting');
|
|
514
|
+
expect(result.suggestions).toContain('Add logging');
|
|
515
|
+
});
|
|
516
|
+
|
|
517
|
+
it('handles CONSENSUS: XX% fallback format', () => {
|
|
518
|
+
const raw = `ANALYSIS: This plan looks good.
|
|
519
|
+
CONSENSUS: 92%`;
|
|
520
|
+
|
|
521
|
+
const result = parseRawReviewResponse(raw);
|
|
522
|
+
expect(result.confidence).toBe(0.92);
|
|
523
|
+
});
|
|
524
|
+
|
|
525
|
+
it('extracts [BLOCKER], [REQUIRED], [SUGGESTION] tagged items', () => {
|
|
526
|
+
const raw = `VOTE: REJECT
|
|
527
|
+
CONFIDENCE: 0.40
|
|
528
|
+
[BLOCKER] SQL injection vulnerability in user input handling
|
|
529
|
+
[REQUIRED] Add input sanitization
|
|
530
|
+
[SUGGESTION] Consider using parameterized queries throughout`;
|
|
531
|
+
|
|
532
|
+
const result = parseRawReviewResponse(raw);
|
|
533
|
+
expect(result.blockingIssues).toContain('SQL injection vulnerability in user input handling');
|
|
534
|
+
expect(result.requiredChanges).toContain('Add input sanitization');
|
|
535
|
+
expect(result.suggestions).toContain('Consider using parameterized queries throughout');
|
|
536
|
+
});
|
|
537
|
+
|
|
538
|
+
it('confidence > 1 is treated as percentage and normalized', () => {
|
|
539
|
+
const raw = 'CONFIDENCE: 92';
|
|
540
|
+
const result = parseRawReviewResponse(raw);
|
|
541
|
+
expect(result.confidence).toBe(0.92);
|
|
542
|
+
});
|
|
543
|
+
|
|
544
|
+
it('defaults to confidence 0 when no parseable score', () => {
|
|
545
|
+
const raw = 'This plan is mediocre.';
|
|
546
|
+
const result = parseRawReviewResponse(raw);
|
|
547
|
+
expect(result.confidence).toBe(0);
|
|
548
|
+
});
|
|
549
|
+
|
|
550
|
+
it('extracts vote even when mixed with other text', () => {
|
|
551
|
+
const raw = 'After careful analysis, I believe the plan deserves APPROVE. Confidence: 0.97';
|
|
552
|
+
const result = parseRawReviewResponse(raw);
|
|
553
|
+
expect(result.modelVote).toBe('APPROVE');
|
|
554
|
+
expect(result.confidence).toBe(0.97);
|
|
555
|
+
});
|
|
556
|
+
|
|
557
|
+
it('extracts confidence from truncated JSON (quotes around key)', () => {
|
|
558
|
+
// Simulates a truncated JSON response where JSON.parse fails but the
|
|
559
|
+
// regex fallback should still extract confidence from "confidence": 0.88
|
|
560
|
+
const raw = '```json\n{"vote": "CONDITIONAL", "confidence": 0.88, "blocking_issues": [], "required_changes": ["[REQUIRED] Add...';
|
|
561
|
+
const result = parseRawReviewResponse(raw);
|
|
562
|
+
expect(result.modelVote).toBe('CONDITIONAL');
|
|
563
|
+
expect(result.confidence).toBe(0.88);
|
|
564
|
+
});
|
|
565
|
+
|
|
566
|
+
it('handles numbered bullet lists in sections', () => {
|
|
567
|
+
const raw = `VOTE: CONDITIONAL
|
|
568
|
+
CONFIDENCE: 0.85
|
|
569
|
+
|
|
570
|
+
REQUIRED CHANGES:
|
|
571
|
+
1. Add error handling
|
|
572
|
+
2. Improve validation
|
|
573
|
+
3. Fix API route naming`;
|
|
574
|
+
|
|
575
|
+
const result = parseRawReviewResponse(raw);
|
|
576
|
+
expect(result.requiredChanges).toHaveLength(3);
|
|
577
|
+
expect(result.requiredChanges).toContain('Add error handling');
|
|
578
|
+
});
|
|
579
|
+
});
|
|
580
|
+
|
|
581
|
+
describe('governance rule: vote derived from confidence', () => {
|
|
582
|
+
it('modelVote APPROVE with confidence 0.93 -> derived vote is CONDITIONAL', () => {
|
|
583
|
+
// Simulate what spawnSingleReviewer does:
|
|
584
|
+
// model says APPROVE but confidence 0.93 < 0.95 threshold
|
|
585
|
+
const confidence = 0.93;
|
|
586
|
+
const threshold = 0.95;
|
|
587
|
+
const derived = mapVote(confidence, threshold);
|
|
588
|
+
const modelVote = 'APPROVE';
|
|
589
|
+
|
|
590
|
+
expect(derived).toBe('CONDITIONAL');
|
|
591
|
+
expect(modelVote).not.toBe(derived);
|
|
592
|
+
});
|
|
593
|
+
|
|
594
|
+
it('modelVote REJECT with confidence 0.96 -> derived vote is APPROVE', () => {
|
|
595
|
+
// model says REJECT but confidence 0.96 >= 0.95 threshold
|
|
596
|
+
const confidence = 0.96;
|
|
597
|
+
const threshold = 0.95;
|
|
598
|
+
const derived = mapVote(confidence, threshold);
|
|
599
|
+
const modelVote = 'REJECT';
|
|
600
|
+
|
|
601
|
+
expect(derived).toBe('APPROVE');
|
|
602
|
+
expect(modelVote).not.toBe(derived);
|
|
603
|
+
});
|
|
604
|
+
|
|
605
|
+
it('reviewer_inconsistency is true when model and derived disagree', () => {
|
|
606
|
+
const confidence = 0.93;
|
|
607
|
+
const threshold = 0.95;
|
|
608
|
+
const derived = mapVote(confidence, threshold);
|
|
609
|
+
const modelVote = 'APPROVE';
|
|
610
|
+
const reviewer_inconsistency = modelVote !== null && modelVote !== derived;
|
|
611
|
+
|
|
612
|
+
expect(reviewer_inconsistency).toBe(true);
|
|
613
|
+
});
|
|
614
|
+
|
|
615
|
+
it('reviewer_inconsistency is false when model and derived agree', () => {
|
|
616
|
+
const confidence = 0.96;
|
|
617
|
+
const threshold = 0.95;
|
|
618
|
+
const derived = mapVote(confidence, threshold);
|
|
619
|
+
const modelVote = 'APPROVE';
|
|
620
|
+
const reviewer_inconsistency = modelVote !== null && modelVote !== derived;
|
|
621
|
+
|
|
622
|
+
expect(reviewer_inconsistency).toBe(false);
|
|
623
|
+
});
|
|
624
|
+
|
|
625
|
+
it('vote derivation always uses mapVote regardless of modelVote', () => {
|
|
626
|
+
// Even if modelVote is null, derived should still work
|
|
627
|
+
const confidence = 0.50;
|
|
628
|
+
const threshold = 0.95;
|
|
629
|
+
const derived = mapVote(confidence, threshold);
|
|
630
|
+
|
|
631
|
+
expect(derived).toBe('REJECT');
|
|
632
|
+
});
|
|
633
|
+
});
|
|
634
|
+
|
|
635
|
+
describe('buildReviewPrompt — JSON response format', () => {
|
|
636
|
+
it('should request JSON response format', () => {
|
|
637
|
+
const packet = makePlanPacket();
|
|
638
|
+
const prompt = buildReviewPrompt(packet);
|
|
639
|
+
|
|
640
|
+
expect(prompt).toContain('Return ONLY a JSON object');
|
|
641
|
+
expect(prompt).toContain('"vote"');
|
|
642
|
+
expect(prompt).toContain('"confidence"');
|
|
643
|
+
expect(prompt).toContain('"blocking_issues"');
|
|
644
|
+
expect(prompt).toContain('"required_changes"');
|
|
645
|
+
expect(prompt).toContain('"suggestions"');
|
|
646
|
+
});
|
|
647
|
+
|
|
648
|
+
it('should include confidence scale guidance', () => {
|
|
649
|
+
const packet = makePlanPacket();
|
|
650
|
+
const prompt = buildReviewPrompt(packet);
|
|
651
|
+
|
|
652
|
+
expect(prompt).toContain('0.95-1.00: APPROVE');
|
|
653
|
+
expect(prompt).toContain('0.80-0.94: CONDITIONAL');
|
|
654
|
+
expect(prompt).toContain('Below 0.80: REJECT');
|
|
655
|
+
});
|
|
656
|
+
|
|
657
|
+
it('should not contain old "Respond with" format', () => {
|
|
658
|
+
const packet = makePlanPacket();
|
|
659
|
+
const prompt = buildReviewPrompt(packet);
|
|
660
|
+
|
|
661
|
+
expect(prompt).not.toContain('Respond with:\n- APPROVE, REJECT, or CONDITIONAL');
|
|
662
|
+
expect(prompt).not.toContain('Confidence score (0-1)');
|
|
663
|
+
});
|
|
664
|
+
});
|
|
665
|
+
|
|
666
|
+
// ─── v2.4.1: Plan Content Loading Tests ──────────────────
|
|
667
|
+
|
|
668
|
+
describe('loadPlanContent', () => {
|
|
669
|
+
function makeTempDir(): string {
|
|
670
|
+
return fs.mkdtempSync(path.join(os.tmpdir(), 'consensus-test-'));
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
it('loads content from valid path', () => {
|
|
674
|
+
const dir = makeTempDir();
|
|
675
|
+
const planPath = 'docs/master_plan.md';
|
|
676
|
+
fs.mkdirSync(path.join(dir, 'docs'), { recursive: true });
|
|
677
|
+
fs.writeFileSync(path.join(dir, planPath), '# My Plan\nDetails here.');
|
|
678
|
+
|
|
679
|
+
const result = loadPlanContent(dir, planPath);
|
|
680
|
+
expect(result.content).toContain('# My Plan');
|
|
681
|
+
expect(result.content).toContain('Details here.');
|
|
682
|
+
expect(result.truncated).toBe(false);
|
|
683
|
+
|
|
684
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
685
|
+
});
|
|
686
|
+
|
|
687
|
+
it('returns empty for missing file', () => {
|
|
688
|
+
const dir = makeTempDir();
|
|
689
|
+
const result = loadPlanContent(dir, 'docs/nonexistent.md');
|
|
690
|
+
expect(result.content).toBe('');
|
|
691
|
+
expect(result.truncated).toBe(false);
|
|
692
|
+
|
|
693
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
694
|
+
});
|
|
695
|
+
|
|
696
|
+
it('blocks path traversal (../../etc/passwd)', () => {
|
|
697
|
+
const dir = makeTempDir();
|
|
698
|
+
const result = loadPlanContent(dir, '../../etc/passwd');
|
|
699
|
+
expect(result.content).toBe('');
|
|
700
|
+
expect(result.truncated).toBe(false);
|
|
701
|
+
|
|
702
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
703
|
+
});
|
|
704
|
+
|
|
705
|
+
it('blocks path traversal (absolute path escape)', () => {
|
|
706
|
+
const dir = makeTempDir();
|
|
707
|
+
// Even if the attacker uses a relative path that resolves outside
|
|
708
|
+
const result = loadPlanContent(dir, '../../../tmp/evil.txt');
|
|
709
|
+
expect(result.content).toBe('');
|
|
710
|
+
|
|
711
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
712
|
+
});
|
|
713
|
+
|
|
714
|
+
it('truncates content exceeding 50K chars', () => {
|
|
715
|
+
const dir = makeTempDir();
|
|
716
|
+
const planPath = 'plan.md';
|
|
717
|
+
// Create 60K content
|
|
718
|
+
const bigContent = 'A'.repeat(60_000);
|
|
719
|
+
fs.writeFileSync(path.join(dir, planPath), bigContent);
|
|
720
|
+
|
|
721
|
+
const result = loadPlanContent(dir, planPath);
|
|
722
|
+
expect(result.truncated).toBe(true);
|
|
723
|
+
expect(result.content).toContain('[TRUNCATED');
|
|
724
|
+
// Content should be capped around 50K + truncation marker
|
|
725
|
+
expect(result.content.length).toBeLessThan(60_000);
|
|
726
|
+
|
|
727
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
728
|
+
});
|
|
729
|
+
|
|
730
|
+
it('returns empty when artifactPath is undefined', () => {
|
|
731
|
+
const result = loadPlanContent('/tmp/test', undefined);
|
|
732
|
+
expect(result.content).toBe('');
|
|
733
|
+
expect(result.truncated).toBe(false);
|
|
734
|
+
});
|
|
735
|
+
});
|
|
736
|
+
|
|
737
|
+
// ─── v2.4.1: Plan Content in Prompt Tests ────────────────
|
|
738
|
+
|
|
739
|
+
describe('buildReviewPrompt — plan content', () => {
|
|
740
|
+
it('includes plan content when provided', () => {
|
|
741
|
+
const packet = makePlanPacket();
|
|
742
|
+
const prompt = buildReviewPrompt(packet, '# Master Plan\nBuild the API.');
|
|
743
|
+
|
|
744
|
+
expect(prompt).toContain('## Plan Content');
|
|
745
|
+
expect(prompt).toContain('# Master Plan');
|
|
746
|
+
expect(prompt).toContain('Build the API.');
|
|
747
|
+
});
|
|
748
|
+
|
|
749
|
+
it('shows warning when plan content is empty', () => {
|
|
750
|
+
const packet = makePlanPacket();
|
|
751
|
+
const prompt = buildReviewPrompt(packet, '');
|
|
752
|
+
|
|
753
|
+
expect(prompt).toContain('## Plan Content');
|
|
754
|
+
expect(prompt).toContain('[WARNING: Plan content could not be loaded');
|
|
755
|
+
});
|
|
756
|
+
|
|
757
|
+
it('shows warning when plan content is undefined (backward compat)', () => {
|
|
758
|
+
const packet = makePlanPacket();
|
|
759
|
+
const prompt = buildReviewPrompt(packet);
|
|
760
|
+
|
|
761
|
+
expect(prompt).toContain('## Plan Content');
|
|
762
|
+
expect(prompt).toContain('[WARNING: Plan content could not be loaded');
|
|
763
|
+
});
|
|
764
|
+
});
|
|
765
|
+
|
|
766
|
+
// ─── v2.4.1: Symmetric Confidence Contradiction Correction ─
|
|
767
|
+
|
|
768
|
+
describe('correctConfidenceContradiction', () => {
|
|
769
|
+
it('corrects REJECT + 0.99 -> min(0.79, 0.01) = 0.01', () => {
|
|
770
|
+
const r = correctConfidenceContradiction('REJECT', 0.99);
|
|
771
|
+
expect(r.confidence).toBeCloseTo(0.01, 5);
|
|
772
|
+
expect(r.wasContradiction).toBe(true);
|
|
773
|
+
expect(r.original).toBeCloseTo(0.99, 5);
|
|
774
|
+
});
|
|
775
|
+
|
|
776
|
+
it('corrects REJECT + 0.85 -> min(0.79, 0.15) = 0.15', () => {
|
|
777
|
+
const r = correctConfidenceContradiction('REJECT', 0.85);
|
|
778
|
+
expect(r.confidence).toBeCloseTo(0.15, 5);
|
|
779
|
+
expect(r.wasContradiction).toBe(true);
|
|
780
|
+
});
|
|
781
|
+
|
|
782
|
+
it('does NOT correct REJECT + 0.50 (already in range)', () => {
|
|
783
|
+
const r = correctConfidenceContradiction('REJECT', 0.50);
|
|
784
|
+
expect(r.confidence).toBeCloseTo(0.50, 5);
|
|
785
|
+
expect(r.wasContradiction).toBe(false);
|
|
786
|
+
});
|
|
787
|
+
|
|
788
|
+
it('corrects CONDITIONAL + 0.98 -> snap to 0.87', () => {
|
|
789
|
+
const r = correctConfidenceContradiction('CONDITIONAL', 0.98);
|
|
790
|
+
expect(r.confidence).toBeCloseTo(0.87, 5);
|
|
791
|
+
expect(r.wasContradiction).toBe(true);
|
|
792
|
+
});
|
|
793
|
+
|
|
794
|
+
it('corrects CONDITIONAL + 0.60 -> snap to 0.87', () => {
|
|
795
|
+
const r = correctConfidenceContradiction('CONDITIONAL', 0.60);
|
|
796
|
+
expect(r.confidence).toBeCloseTo(0.87, 5);
|
|
797
|
+
expect(r.wasContradiction).toBe(true);
|
|
798
|
+
});
|
|
799
|
+
|
|
800
|
+
it('does NOT correct CONDITIONAL + 0.88 (already in range)', () => {
|
|
801
|
+
const r = correctConfidenceContradiction('CONDITIONAL', 0.88);
|
|
802
|
+
expect(r.confidence).toBeCloseTo(0.88, 5);
|
|
803
|
+
expect(r.wasContradiction).toBe(false);
|
|
804
|
+
});
|
|
805
|
+
|
|
806
|
+
it('corrects APPROVE + 0.40 -> max(0.95, 0.60) = 0.95', () => {
|
|
807
|
+
const r = correctConfidenceContradiction('APPROVE', 0.40);
|
|
808
|
+
expect(r.confidence).toBeCloseTo(0.95, 5);
|
|
809
|
+
expect(r.wasContradiction).toBe(true);
|
|
810
|
+
});
|
|
811
|
+
|
|
812
|
+
it('corrects APPROVE + 0.02 -> max(0.95, 0.98) = 0.98', () => {
|
|
813
|
+
const r = correctConfidenceContradiction('APPROVE', 0.02);
|
|
814
|
+
expect(r.confidence).toBeCloseTo(0.98, 5);
|
|
815
|
+
expect(r.wasContradiction).toBe(true);
|
|
816
|
+
});
|
|
817
|
+
|
|
818
|
+
it('corrects APPROVE + 0.88 -> snap to 0.95', () => {
|
|
819
|
+
const r = correctConfidenceContradiction('APPROVE', 0.88);
|
|
820
|
+
expect(r.confidence).toBeCloseTo(0.95, 5);
|
|
821
|
+
expect(r.wasContradiction).toBe(true);
|
|
822
|
+
});
|
|
823
|
+
|
|
824
|
+
it('does NOT correct APPROVE + 0.96 (already in range)', () => {
|
|
825
|
+
const r = correctConfidenceContradiction('APPROVE', 0.96);
|
|
826
|
+
expect(r.confidence).toBeCloseTo(0.96, 5);
|
|
827
|
+
expect(r.wasContradiction).toBe(false);
|
|
828
|
+
});
|
|
829
|
+
|
|
830
|
+
it('does NOT correct when modelVote is null', () => {
|
|
831
|
+
const r = correctConfidenceContradiction(null, 0.99);
|
|
832
|
+
expect(r.confidence).toBeCloseTo(0.99, 5);
|
|
833
|
+
expect(r.wasContradiction).toBe(false);
|
|
834
|
+
});
|
|
835
|
+
});
|
|
836
|
+
|
|
837
|
+
// ─── v2.4.1: Governance Preservation Tests ────────────────
|
|
838
|
+
|
|
839
|
+
describe('governance preservation (correctConfidenceContradiction + mapVote)', () => {
|
|
840
|
+
it('corrected REJECT derives REJECT via mapVote', () => {
|
|
841
|
+
const { confidence } = correctConfidenceContradiction('REJECT', 0.99);
|
|
842
|
+
expect(mapVote(confidence, 0.95)).toBe('REJECT');
|
|
843
|
+
});
|
|
844
|
+
|
|
845
|
+
it('corrected CONDITIONAL derives CONDITIONAL via mapVote', () => {
|
|
846
|
+
const { confidence } = correctConfidenceContradiction('CONDITIONAL', 0.98);
|
|
847
|
+
expect(mapVote(confidence, 0.95)).toBe('CONDITIONAL');
|
|
848
|
+
});
|
|
849
|
+
|
|
850
|
+
it('corrected APPROVE derives APPROVE via mapVote', () => {
|
|
851
|
+
const { confidence } = correctConfidenceContradiction('APPROVE', 0.40);
|
|
852
|
+
expect(mapVote(confidence, 0.95)).toBe('APPROVE');
|
|
853
|
+
});
|
|
854
|
+
});
|
|
855
|
+
|
|
856
|
+
// ─── v2.4.1: Integration (parseRawReviewResponse + correction) ─
|
|
857
|
+
|
|
858
|
+
describe('parseRawReviewResponse — confidence contradiction correction', () => {
|
|
859
|
+
it('corrects JSON response with REJECT + 0.99 and logs warning', () => {
|
|
860
|
+
const raw = JSON.stringify({
|
|
861
|
+
vote: 'REJECT',
|
|
862
|
+
confidence: 0.99,
|
|
863
|
+
blocking_issues: ['[BLOCKER] Missing auth'],
|
|
864
|
+
required_changes: [],
|
|
865
|
+
suggestions: [],
|
|
866
|
+
});
|
|
867
|
+
const result = parseRawReviewResponse(raw);
|
|
868
|
+
|
|
869
|
+
// Confidence should be corrected: min(0.79, 1 - 0.99) = 0.01
|
|
870
|
+
expect(result.confidence).toBeCloseTo(0.01, 5);
|
|
871
|
+
expect(result.modelVote).toBe('REJECT');
|
|
872
|
+
});
|
|
873
|
+
|
|
874
|
+
it('does not alter valid REJECT + 0.45', () => {
|
|
875
|
+
const raw = JSON.stringify({
|
|
876
|
+
vote: 'REJECT',
|
|
877
|
+
confidence: 0.45,
|
|
878
|
+
blocking_issues: ['[BLOCKER] Missing auth'],
|
|
879
|
+
required_changes: [],
|
|
880
|
+
suggestions: [],
|
|
881
|
+
});
|
|
882
|
+
const result = parseRawReviewResponse(raw);
|
|
883
|
+
|
|
884
|
+
expect(result.confidence).toBeCloseTo(0.45, 5);
|
|
885
|
+
});
|
|
886
|
+
|
|
887
|
+
it('corrects JSON response with APPROVE + 0.40', () => {
|
|
888
|
+
const raw = JSON.stringify({
|
|
889
|
+
vote: 'APPROVE',
|
|
890
|
+
confidence: 0.40,
|
|
891
|
+
blocking_issues: [],
|
|
892
|
+
required_changes: [],
|
|
893
|
+
suggestions: [],
|
|
894
|
+
});
|
|
895
|
+
const result = parseRawReviewResponse(raw);
|
|
896
|
+
|
|
897
|
+
// Corrected: max(0.95, 1 - 0.40) = max(0.95, 0.60) = 0.95
|
|
898
|
+
expect(result.confidence).toBeCloseTo(0.95, 5);
|
|
899
|
+
});
|
|
900
|
+
});
|
|
901
|
+
|
|
902
|
+
// ─── v2.4.1: Prompt Wording Tests ────────────────────────
|
|
903
|
+
|
|
904
|
+
describe('buildReviewPrompt — confidence semantics wording', () => {
|
|
905
|
+
it('should state confidence is plan quality not review certainty', () => {
|
|
906
|
+
const packet = makePlanPacket();
|
|
907
|
+
const prompt = buildReviewPrompt(packet);
|
|
908
|
+
|
|
909
|
+
expect(prompt).toContain('PLAN QUALITY');
|
|
910
|
+
expect(prompt).toContain('NOT how certain you are');
|
|
911
|
+
});
|
|
912
|
+
|
|
913
|
+
it('should warn that mismatches will be auto-corrected', () => {
|
|
914
|
+
const packet = makePlanPacket();
|
|
915
|
+
const prompt = buildReviewPrompt(packet);
|
|
916
|
+
|
|
917
|
+
expect(prompt).toContain('auto-corrected');
|
|
918
|
+
expect(prompt).toContain('Mismatched vote+confidence');
|
|
919
|
+
});
|
|
920
|
+
|
|
921
|
+
it('should include valid/invalid response examples', () => {
|
|
922
|
+
const packet = makePlanPacket();
|
|
923
|
+
const prompt = buildReviewPrompt(packet);
|
|
924
|
+
|
|
925
|
+
expect(prompt).toContain('Examples of VALID responses');
|
|
926
|
+
expect(prompt).toContain('Examples of INVALID responses');
|
|
927
|
+
expect(prompt).toContain('REJECT with confidence 0.99');
|
|
928
|
+
expect(prompt).toContain('APPROVE with confidence 0.60');
|
|
929
|
+
});
|
|
930
|
+
});
|
|
931
|
+
|
|
932
|
+
// ─── v2.4.1: Arbitrator Governance Tests ──────────────────
|
|
933
|
+
|
|
934
|
+
describe('arbitrator governance (confidence-only derivation)', () => {
|
|
935
|
+
it('arbitrator approval derived from confidence only, not modelVote', () => {
|
|
936
|
+
const rawLowConf = JSON.stringify({
|
|
937
|
+
vote: 'APPROVE',
|
|
938
|
+
confidence: 0.85,
|
|
939
|
+
blocking_issues: [],
|
|
940
|
+
required_changes: [],
|
|
941
|
+
suggestions: [],
|
|
942
|
+
});
|
|
943
|
+
const parsedLow = parseRawReviewResponse(rawLowConf);
|
|
944
|
+
// APPROVE + 0.85 -> corrected to 0.95, so approved = true
|
|
945
|
+
const approvedCorrected = parsedLow.confidence >= 0.90;
|
|
946
|
+
expect(approvedCorrected).toBe(true);
|
|
947
|
+
|
|
948
|
+
// Now test: REJECT + 0.50 -> no correction, confidence stays 0.50
|
|
949
|
+
const rawReject = JSON.stringify({
|
|
950
|
+
vote: 'REJECT',
|
|
951
|
+
confidence: 0.50,
|
|
952
|
+
blocking_issues: ['[BLOCKER] Bad plan'],
|
|
953
|
+
required_changes: [],
|
|
954
|
+
suggestions: [],
|
|
955
|
+
});
|
|
956
|
+
const parsedReject = parseRawReviewResponse(rawReject);
|
|
957
|
+
const approvedReject = parsedReject.confidence >= 0.90;
|
|
958
|
+
expect(approvedReject).toBe(false);
|
|
959
|
+
});
|
|
960
|
+
});
|
|
961
|
+
|
|
962
|
+
// ─── v2.4.2: Version-keyed Arbitration Tests ──────────────
|
|
963
|
+
|
|
964
|
+
describe('version-keyed arbitration (v2.4.2)', () => {
|
|
965
|
+
it('same phase + new version allows retry', () => {
|
|
966
|
+
const runner = new ConsensusRunner({
|
|
967
|
+
mode: 'independent',
|
|
968
|
+
minReviewers: 2,
|
|
969
|
+
threshold: 0.95,
|
|
970
|
+
quorum: 2,
|
|
971
|
+
projectDir: '/tmp/test',
|
|
972
|
+
enableArbitration: true,
|
|
973
|
+
arbitratorProvider: { provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.2 },
|
|
974
|
+
});
|
|
975
|
+
|
|
976
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
977
|
+
const attempted = (runner as any).arbitrationAttempted as Set<string>;
|
|
978
|
+
attempted.add('CONSENSUS_MASTER_PLAN@v1');
|
|
979
|
+
|
|
980
|
+
// v2 should NOT be blocked
|
|
981
|
+
expect(attempted.has('CONSENSUS_MASTER_PLAN@v2')).toBe(false);
|
|
982
|
+
});
|
|
983
|
+
|
|
984
|
+
it('same phase + same version blocks retry', () => {
|
|
985
|
+
const runner = new ConsensusRunner({
|
|
986
|
+
mode: 'independent',
|
|
987
|
+
minReviewers: 2,
|
|
988
|
+
threshold: 0.95,
|
|
989
|
+
quorum: 2,
|
|
990
|
+
projectDir: '/tmp/test',
|
|
991
|
+
enableArbitration: true,
|
|
992
|
+
arbitratorProvider: { provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.2 },
|
|
993
|
+
});
|
|
994
|
+
|
|
995
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
996
|
+
const attempted = (runner as any).arbitrationAttempted as Set<string>;
|
|
997
|
+
attempted.add('CONSENSUS_MASTER_PLAN@v1');
|
|
998
|
+
|
|
999
|
+
expect(attempted.has('CONSENSUS_MASTER_PLAN@v1')).toBe(true);
|
|
1000
|
+
});
|
|
1001
|
+
});
|
|
1002
|
+
|
|
1003
|
+
// ─── v2.4.2: Revision Directive Tests ──────────────────────
|
|
1004
|
+
|
|
1005
|
+
describe('revisionDirective in review prompt (v2.4.2)', () => {
|
|
1006
|
+
it('revisionDirective is rendered in review prompt when provided', () => {
|
|
1007
|
+
const packet = makePlanPacket({
|
|
1008
|
+
metadata: {
|
|
1009
|
+
packet_id: 'plan-2',
|
|
1010
|
+
timestamp: new Date().toISOString(),
|
|
1011
|
+
phase: 'CONSENSUS_MASTER_PLAN',
|
|
1012
|
+
submitted_by: 'DISPATCHER',
|
|
1013
|
+
version: 2,
|
|
1014
|
+
},
|
|
1015
|
+
});
|
|
1016
|
+
const directive = 'Fix the authentication flow and add rate limiting';
|
|
1017
|
+
const prompt = buildReviewPrompt(packet, '# Plan', directive);
|
|
1018
|
+
|
|
1019
|
+
expect(prompt).toContain('Prior Feedback (Must Address)');
|
|
1020
|
+
expect(prompt).toContain('Fix the authentication flow');
|
|
1021
|
+
expect(prompt).toContain('Confirm each item above is addressed');
|
|
1022
|
+
});
|
|
1023
|
+
|
|
1024
|
+
it('revisionDirective is NOT rendered when undefined (backward compat)', () => {
|
|
1025
|
+
const packet = makePlanPacket();
|
|
1026
|
+
const prompt = buildReviewPrompt(packet, '# Plan');
|
|
1027
|
+
|
|
1028
|
+
expect(prompt).not.toContain('Prior Feedback (Must Address)');
|
|
1029
|
+
});
|
|
1030
|
+
|
|
1031
|
+
it('revisionDirective is NOT rendered when empty string', () => {
|
|
1032
|
+
const packet = makePlanPacket({
|
|
1033
|
+
metadata: {
|
|
1034
|
+
packet_id: 'plan-2',
|
|
1035
|
+
timestamp: new Date().toISOString(),
|
|
1036
|
+
phase: 'CONSENSUS_MASTER_PLAN',
|
|
1037
|
+
submitted_by: 'DISPATCHER',
|
|
1038
|
+
version: 2,
|
|
1039
|
+
},
|
|
1040
|
+
});
|
|
1041
|
+
const prompt = buildReviewPrompt(packet, '# Plan', ' ');
|
|
1042
|
+
|
|
1043
|
+
expect(prompt).not.toContain('Prior Feedback (Must Address)');
|
|
1044
|
+
});
|
|
1045
|
+
|
|
1046
|
+
it('revisionDirective is truncated at 2000 chars', () => {
|
|
1047
|
+
const packet = makePlanPacket({
|
|
1048
|
+
metadata: {
|
|
1049
|
+
packet_id: 'plan-2',
|
|
1050
|
+
timestamp: new Date().toISOString(),
|
|
1051
|
+
phase: 'CONSENSUS_MASTER_PLAN',
|
|
1052
|
+
submitted_by: 'DISPATCHER',
|
|
1053
|
+
version: 2,
|
|
1054
|
+
},
|
|
1055
|
+
});
|
|
1056
|
+
const longDirective = 'A'.repeat(3000);
|
|
1057
|
+
const prompt = buildReviewPrompt(packet, '# Plan', longDirective);
|
|
1058
|
+
|
|
1059
|
+
expect(prompt).toContain('Prior Feedback (Must Address)');
|
|
1060
|
+
expect(prompt).toContain('[TRUNCATED');
|
|
1061
|
+
// Should not contain full 3000 chars of content
|
|
1062
|
+
const directiveSection = prompt.split('Prior Feedback (Must Address)')[1];
|
|
1063
|
+
expect(directiveSection.indexOf('A'.repeat(2001))).toBe(-1);
|
|
1064
|
+
});
|
|
1065
|
+
|
|
1066
|
+
it('revision notice appears in prompt when version > 1 and mentions "prior issues"', () => {
|
|
1067
|
+
const packet = makePlanPacket({
|
|
1068
|
+
metadata: {
|
|
1069
|
+
packet_id: 'plan-3',
|
|
1070
|
+
timestamp: new Date().toISOString(),
|
|
1071
|
+
phase: 'CONSENSUS_MASTER_PLAN',
|
|
1072
|
+
submitted_by: 'DISPATCHER',
|
|
1073
|
+
version: 3,
|
|
1074
|
+
},
|
|
1075
|
+
});
|
|
1076
|
+
const prompt = buildReviewPrompt(packet, '# Plan');
|
|
1077
|
+
|
|
1078
|
+
expect(prompt).toContain('Revision Notice');
|
|
1079
|
+
expect(prompt).toContain('revision 3');
|
|
1080
|
+
expect(prompt).toContain('prior issues');
|
|
1081
|
+
});
|
|
1082
|
+
|
|
1083
|
+
it('revision notice does NOT appear when version = 1', () => {
|
|
1084
|
+
const packet = makePlanPacket();
|
|
1085
|
+
const prompt = buildReviewPrompt(packet);
|
|
1086
|
+
|
|
1087
|
+
expect(prompt).not.toContain('Revision Notice');
|
|
1088
|
+
});
|
|
1089
|
+
});
|
|
1090
|
+
|
|
1091
|
+
// ─── v2.4.2: getArbitrationTrigger Tests ──────────────────
|
|
1092
|
+
|
|
1093
|
+
describe('getArbitrationTrigger (v2.4.2)', () => {
|
|
1094
|
+
it('returns DISAGREEMENT when votes have mixed APPROVE/REJECT', () => {
|
|
1095
|
+
const votes = [
|
|
1096
|
+
makeVote('r1', 'APPROVE', 0.96),
|
|
1097
|
+
makeVote('r2', 'REJECT', 0.5),
|
|
1098
|
+
];
|
|
1099
|
+
expect(getArbitrationTrigger(votes, 0.48, 0.95)).toBe('DISAGREEMENT');
|
|
1100
|
+
});
|
|
1101
|
+
|
|
1102
|
+
it('returns BORDERLINE_SCORE when weighted_score within 0.10 of threshold', () => {
|
|
1103
|
+
// All same vote (no disagreement), but score within 0.10 of threshold
|
|
1104
|
+
const votes = [
|
|
1105
|
+
makeVote('r1', 'CONDITIONAL', 0.90),
|
|
1106
|
+
makeVote('r2', 'CONDITIONAL', 0.88),
|
|
1107
|
+
];
|
|
1108
|
+
// weighted_score 0.89, threshold 0.95, 0.89 >= 0.85 -> BORDERLINE
|
|
1109
|
+
expect(getArbitrationTrigger(votes, 0.89, 0.95)).toBe('BORDERLINE_SCORE');
|
|
1110
|
+
});
|
|
1111
|
+
|
|
1112
|
+
it('returns ALL_CONDITIONAL when all votes conditional with high confidence', () => {
|
|
1113
|
+
const votes: ReviewerVote[] = [
|
|
1114
|
+
{ ...makeVote('r1', 'CONDITIONAL', 0.94), blocking_issues: [], required_changes: ['Fix A'] },
|
|
1115
|
+
{ ...makeVote('r2', 'CONDITIONAL', 0.95), blocking_issues: [], required_changes: ['Fix B'] },
|
|
1116
|
+
];
|
|
1117
|
+
// Not DISAGREEMENT (all same), not BORDERLINE (0.44 < 0.85)
|
|
1118
|
+
expect(getArbitrationTrigger(votes, 0.44, 0.95)).toBe('ALL_CONDITIONAL');
|
|
1119
|
+
});
|
|
1120
|
+
|
|
1121
|
+
it('returns NONE when no trigger conditions met', () => {
|
|
1122
|
+
const votes = [
|
|
1123
|
+
makeVote('r1', 'REJECT', 0.3),
|
|
1124
|
+
makeVote('r2', 'REJECT', 0.4),
|
|
1125
|
+
];
|
|
1126
|
+
// Unanimous REJECT, score 0.0, threshold 0.95 => no trigger
|
|
1127
|
+
expect(getArbitrationTrigger(votes, 0.0, 0.95)).toBe('NONE');
|
|
1128
|
+
});
|
|
1129
|
+
|
|
1130
|
+
it('DISAGREEMENT takes priority over BORDERLINE_SCORE', () => {
|
|
1131
|
+
const votes = [
|
|
1132
|
+
makeVote('r1', 'APPROVE', 0.96),
|
|
1133
|
+
makeVote('r2', 'REJECT', 0.5),
|
|
1134
|
+
];
|
|
1135
|
+
// weighted_score 0.48 is also borderline of 0.55 threshold, but DISAGREEMENT fires first
|
|
1136
|
+
expect(getArbitrationTrigger(votes, 0.48, 0.55)).toBe('DISAGREEMENT');
|
|
1137
|
+
});
|
|
1138
|
+
});
|
|
1139
|
+
|
|
1140
|
+
// ─── v2.4.2: Arbitrator Rotation Tests ────────────────────
|
|
1141
|
+
|
|
1142
|
+
describe('arbitrator rotation (v2.4.2)', () => {
|
|
1143
|
+
it('arbitrator rotates to OpenAI when default Gemini is a dissenter', () => {
|
|
1144
|
+
const runner = new ConsensusRunner({
|
|
1145
|
+
mode: 'independent',
|
|
1146
|
+
minReviewers: 2,
|
|
1147
|
+
threshold: 0.95,
|
|
1148
|
+
quorum: 2,
|
|
1149
|
+
projectDir: '/tmp/test',
|
|
1150
|
+
enableArbitration: true,
|
|
1151
|
+
arbitratorProvider: { provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.2 },
|
|
1152
|
+
reviewerProviders: [
|
|
1153
|
+
{ provider: 'openai', model: 'gpt-4.1', temperature: 0.3 },
|
|
1154
|
+
{ provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.3 },
|
|
1155
|
+
],
|
|
1156
|
+
});
|
|
1157
|
+
|
|
1158
|
+
// Access private callArbitrator via constructing the scenario:
|
|
1159
|
+
// We verify the rotation logic by checking the internal state
|
|
1160
|
+
// The rotation happens inside callArbitrator, which we can't easily unit-test
|
|
1161
|
+
// without mocking the adapter. Instead, verify the runner is properly configured.
|
|
1162
|
+
expect(runner).toBeDefined();
|
|
1163
|
+
|
|
1164
|
+
// Verify the rotation logic directly via the dissenter detection:
|
|
1165
|
+
const votes = [
|
|
1166
|
+
makeVote('r1', 'APPROVE', 0.96),
|
|
1167
|
+
{ ...makeVote('r2', 'REJECT', 0.5), provider: 'gemini' },
|
|
1168
|
+
];
|
|
1169
|
+
const dissentingProviders = new Set(
|
|
1170
|
+
votes.filter(v => v.vote === 'REJECT').map(v => v.provider),
|
|
1171
|
+
);
|
|
1172
|
+
// Default arbitrator (gemini) IS a dissenter
|
|
1173
|
+
expect(dissentingProviders.has('gemini')).toBe(true);
|
|
1174
|
+
|
|
1175
|
+
// Rotation should pick openai (first in fallback order that's configured & not dissenting)
|
|
1176
|
+
const configuredProviders = new Set(['openai', 'gemini']);
|
|
1177
|
+
const ARBITRATOR_FALLBACK_ORDER = ['openai', 'grok', 'gemini'];
|
|
1178
|
+
const alternate = ARBITRATOR_FALLBACK_ORDER.find(
|
|
1179
|
+
p => !dissentingProviders.has(p) && configuredProviders.has(p),
|
|
1180
|
+
);
|
|
1181
|
+
expect(alternate).toBe('openai');
|
|
1182
|
+
});
|
|
1183
|
+
|
|
1184
|
+
it('arbitrator keeps default when default is NOT a dissenter', () => {
|
|
1185
|
+
const votes = [
|
|
1186
|
+
{ ...makeVote('r1', 'APPROVE', 0.96), provider: 'openai' },
|
|
1187
|
+
{ ...makeVote('r2', 'REJECT', 0.5), provider: 'openai' },
|
|
1188
|
+
];
|
|
1189
|
+
const dissentingProviders = new Set(
|
|
1190
|
+
votes.filter(v => v.vote === 'REJECT').map(v => v.provider),
|
|
1191
|
+
);
|
|
1192
|
+
// Default arbitrator (gemini) is NOT a dissenter
|
|
1193
|
+
expect(dissentingProviders.has('gemini')).toBe(false);
|
|
1194
|
+
});
|
|
1195
|
+
});
|
|
1196
|
+
|
|
1197
|
+
// ─── v2.4.2: Escalation Tests ─────────────────────────────
|
|
1198
|
+
|
|
1199
|
+
describe('escalation (v2.4.2)', () => {
|
|
1200
|
+
it('escalation would add 3rd reviewer at version >= 3 when only 2 providers', () => {
|
|
1201
|
+
// Test the escalation logic: at v3+ with 2 providers, add a 3rd
|
|
1202
|
+
const providers = [
|
|
1203
|
+
{ provider: 'openai', model: 'gpt-4.1', temperature: 0.3 },
|
|
1204
|
+
{ provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.3 },
|
|
1205
|
+
];
|
|
1206
|
+
const version = 3;
|
|
1207
|
+
|
|
1208
|
+
if (version >= 3 && providers.length < 3) {
|
|
1209
|
+
const existingNames = new Set(providers.map(p => p.provider));
|
|
1210
|
+
const candidates = new Set(['openai', 'gemini', 'grok']); // simulated config
|
|
1211
|
+
const PREFERRED_ORDER = ['grok', 'openai', 'gemini'];
|
|
1212
|
+
const tieBreaker = PREFERRED_ORDER.find(p => candidates.has(p) && !existingNames.has(p));
|
|
1213
|
+
|
|
1214
|
+
expect(tieBreaker).toBe('grok');
|
|
1215
|
+
}
|
|
1216
|
+
});
|
|
1217
|
+
|
|
1218
|
+
it('escalation does not add reviewer when already 3+ providers', () => {
|
|
1219
|
+
const providers = [
|
|
1220
|
+
{ provider: 'openai', model: 'gpt-4.1', temperature: 0.3 },
|
|
1221
|
+
{ provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.3 },
|
|
1222
|
+
{ provider: 'grok', model: 'grok-3', temperature: 0.3 },
|
|
1223
|
+
];
|
|
1224
|
+
// With 3 providers already, no escalation needed
|
|
1225
|
+
expect(providers.length >= 3).toBe(true);
|
|
1226
|
+
});
|
|
1227
|
+
|
|
1228
|
+
it('escalation does not add reviewer at version < 3', () => {
|
|
1229
|
+
const version = 2;
|
|
1230
|
+
expect(version >= 3).toBe(false);
|
|
1231
|
+
});
|
|
1232
|
+
});
|
|
1233
|
+
|
|
1234
|
+
// ─── v2.4.2: No Forced-Approval Tests ─────────────────────
|
|
1235
|
+
|
|
1236
|
+
describe('no forced-approval (v2.4.2)', () => {
|
|
1237
|
+
it('high version still returns honest REJECTED status (no escape hatch)', () => {
|
|
1238
|
+
const votes = [
|
|
1239
|
+
makeVote('r1', 'REJECT', 0.3),
|
|
1240
|
+
makeVote('r2', 'REJECT', 0.4),
|
|
1241
|
+
];
|
|
1242
|
+
const packet = buildConsensusPacket({
|
|
1243
|
+
planPacketRef: makeRef(),
|
|
1244
|
+
votes,
|
|
1245
|
+
rules: { threshold: 0.95, quorum: 2, min_reviewers: 2 },
|
|
1246
|
+
});
|
|
1247
|
+
|
|
1248
|
+
// Even at high iterations, governance is never bypassed
|
|
1249
|
+
expect(packet.final_status).toBe('REJECTED');
|
|
1250
|
+
expect(packet.consensus_result.approved).toBe(false);
|
|
1251
|
+
});
|
|
1252
|
+
});
|
|
1253
|
+
|
|
1254
|
+
// ─── v2.4.3: parseArbitratorResponse Tests ─────────────────
|
|
1255
|
+
|
|
1256
|
+
describe('parseArbitratorResponse (v2.4.3)', () => {
|
|
1257
|
+
it('parses JSON in code fence with valid schema -> approved: true', () => {
|
|
1258
|
+
const raw = '```json\n' + JSON.stringify({
|
|
1259
|
+
approved: true,
|
|
1260
|
+
reasoning: 'Plan is solid with minor amendments',
|
|
1261
|
+
suggestedChanges: ['Add error handling to /api/users'],
|
|
1262
|
+
}) + '\n```';
|
|
1263
|
+
const result = parseArbitratorResponse(raw);
|
|
1264
|
+
|
|
1265
|
+
expect(result.approved).toBe(true);
|
|
1266
|
+
expect(result.reasoning).toBe('Plan is solid with minor amendments');
|
|
1267
|
+
expect(result.suggestedChanges).toEqual(['Add error handling to /api/users']);
|
|
1268
|
+
});
|
|
1269
|
+
|
|
1270
|
+
it('parses plain JSON without code fence -> approved: true', () => {
|
|
1271
|
+
const raw = JSON.stringify({
|
|
1272
|
+
approved: true,
|
|
1273
|
+
reasoning: 'Acceptable plan',
|
|
1274
|
+
suggestedChanges: [],
|
|
1275
|
+
});
|
|
1276
|
+
const result = parseArbitratorResponse(raw);
|
|
1277
|
+
|
|
1278
|
+
expect(result.approved).toBe(true);
|
|
1279
|
+
expect(result.reasoning).toBe('Acceptable plan');
|
|
1280
|
+
expect(result.suggestedChanges).toEqual([]);
|
|
1281
|
+
});
|
|
1282
|
+
|
|
1283
|
+
it('parses free-form text "approved: true" -> approved: true', () => {
|
|
1284
|
+
const raw = 'After reviewing the plan, I determine approved: true. The plan addresses all major concerns.';
|
|
1285
|
+
const result = parseArbitratorResponse(raw);
|
|
1286
|
+
|
|
1287
|
+
expect(result.approved).toBe(true);
|
|
1288
|
+
});
|
|
1289
|
+
|
|
1290
|
+
it('parses free-form text "APPROVE" -> approved: true, "REJECT" -> approved: false', () => {
|
|
1291
|
+
const approveRaw = 'I APPROVE this plan based on the evidence presented.';
|
|
1292
|
+
expect(parseArbitratorResponse(approveRaw).approved).toBe(true);
|
|
1293
|
+
|
|
1294
|
+
const rejectRaw = 'I must REJECT this plan due to fundamental issues.';
|
|
1295
|
+
expect(parseArbitratorResponse(rejectRaw).approved).toBe(false);
|
|
1296
|
+
});
|
|
1297
|
+
|
|
1298
|
+
it('garbage text -> approved: false (safe default)', () => {
|
|
1299
|
+
const raw = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.';
|
|
1300
|
+
const result = parseArbitratorResponse(raw);
|
|
1301
|
+
|
|
1302
|
+
expect(result.approved).toBe(false);
|
|
1303
|
+
expect(result.reasoning).toBe(raw.slice(0, 2000));
|
|
1304
|
+
});
|
|
1305
|
+
|
|
1306
|
+
it('schema with both suggestedChanges and suggested_changes -> merged, no error', () => {
|
|
1307
|
+
const raw = JSON.stringify({
|
|
1308
|
+
approved: true,
|
|
1309
|
+
reasoning: 'Good plan',
|
|
1310
|
+
suggestedChanges: ['Fix A'],
|
|
1311
|
+
suggested_changes: ['Fix B'],
|
|
1312
|
+
});
|
|
1313
|
+
const result = parseArbitratorResponse(raw);
|
|
1314
|
+
|
|
1315
|
+
expect(result.approved).toBe(true);
|
|
1316
|
+
expect(result.suggestedChanges).toContain('Fix A');
|
|
1317
|
+
expect(result.suggestedChanges).toContain('Fix B');
|
|
1318
|
+
expect(result.suggestedChanges).toHaveLength(2);
|
|
1319
|
+
});
|
|
1320
|
+
});
|
|
1321
|
+
|
|
1322
|
+
// ─── v2.4.3: Gate ARBITRATED Status Tests ───────────────────
|
|
1323
|
+
|
|
1324
|
+
describe('gate engine respects ARBITRATED (v2.4.3)', () => {
|
|
1325
|
+
it('gate passes when finalStatus=ARBITRATED even with score below threshold', async () => {
|
|
1326
|
+
// Simulate: phase handler stores ARBITRATED with low weighted_score
|
|
1327
|
+
const { createGateEngine } = await import('../../src/pipeline/gate-engine.js');
|
|
1328
|
+
const gateEngine = createGateEngine();
|
|
1329
|
+
const pipeline = {
|
|
1330
|
+
pipelinePhase: 'CONSENSUS_MASTER_PLAN' as const,
|
|
1331
|
+
artifacts: [
|
|
1332
|
+
{ id: 'c1', type: 'consensus', phase: 'CONSENSUS_MASTER_PLAN', path: '', sha256: '', version: 1, content_type: 'json', timestamp: new Date().toISOString() },
|
|
1333
|
+
],
|
|
1334
|
+
gateResults: {
|
|
1335
|
+
CONSENSUS_MASTER_PLAN: {
|
|
1336
|
+
phase: 'CONSENSUS_MASTER_PLAN' as const,
|
|
1337
|
+
pass: true,
|
|
1338
|
+
score: 0.60, // below 0.95 threshold
|
|
1339
|
+
blockers: [],
|
|
1340
|
+
missingArtifacts: [],
|
|
1341
|
+
failedChecks: [],
|
|
1342
|
+
consensusScore: 0.50,
|
|
1343
|
+
finalStatus: 'ARBITRATED', // v2.4.3: should override threshold check
|
|
1344
|
+
timestamp: new Date().toISOString(),
|
|
1345
|
+
},
|
|
1346
|
+
},
|
|
1347
|
+
gateChecks: {},
|
|
1348
|
+
recoveryCount: 0,
|
|
1349
|
+
maxRecoveryIterations: 6,
|
|
1350
|
+
skillUsageEvents: [],
|
|
1351
|
+
latestRepoSnapshot: null,
|
|
1352
|
+
};
|
|
1353
|
+
|
|
1354
|
+
const result = gateEngine.evaluateGate('CONSENSUS_MASTER_PLAN', pipeline as any);
|
|
1355
|
+
// Gate should pass because ARBITRATED overrides score check
|
|
1356
|
+
expect(result.pass).toBe(true);
|
|
1357
|
+
});
|
|
1358
|
+
|
|
1359
|
+
it('gate fails when finalStatus=REJECTED and score below threshold', async () => {
|
|
1360
|
+
const { createGateEngine } = await import('../../src/pipeline/gate-engine.js');
|
|
1361
|
+
const gateEngine = createGateEngine();
|
|
1362
|
+
const pipeline = {
|
|
1363
|
+
pipelinePhase: 'CONSENSUS_MASTER_PLAN' as const,
|
|
1364
|
+
artifacts: [
|
|
1365
|
+
{ id: 'c1', type: 'consensus', phase: 'CONSENSUS_MASTER_PLAN', path: '', sha256: '', version: 1, content_type: 'json', timestamp: new Date().toISOString() },
|
|
1366
|
+
],
|
|
1367
|
+
gateResults: {
|
|
1368
|
+
CONSENSUS_MASTER_PLAN: {
|
|
1369
|
+
phase: 'CONSENSUS_MASTER_PLAN' as const,
|
|
1370
|
+
pass: false,
|
|
1371
|
+
score: 0.60,
|
|
1372
|
+
blockers: [],
|
|
1373
|
+
missingArtifacts: [],
|
|
1374
|
+
failedChecks: [],
|
|
1375
|
+
consensusScore: 0.50,
|
|
1376
|
+
finalStatus: 'REJECTED', // NOT arbitrated
|
|
1377
|
+
timestamp: new Date().toISOString(),
|
|
1378
|
+
},
|
|
1379
|
+
},
|
|
1380
|
+
gateChecks: {},
|
|
1381
|
+
recoveryCount: 0,
|
|
1382
|
+
maxRecoveryIterations: 6,
|
|
1383
|
+
skillUsageEvents: [],
|
|
1384
|
+
latestRepoSnapshot: null,
|
|
1385
|
+
};
|
|
1386
|
+
|
|
1387
|
+
const result = gateEngine.evaluateGate('CONSENSUS_MASTER_PLAN', pipeline as any);
|
|
1388
|
+
// Gate should fail because REJECTED + below threshold
|
|
1389
|
+
expect(result.pass).toBe(false);
|
|
1390
|
+
expect(result.blockers.some(b => b.includes('below threshold'))).toBe(true);
|
|
1391
|
+
});
|
|
1392
|
+
|
|
1393
|
+
it('mergeGateResult preserves finalStatus from phase handler', () => {
|
|
1394
|
+
// Simulate the orchestrator merge logic
|
|
1395
|
+
const pipeline = {
|
|
1396
|
+
gateResults: {
|
|
1397
|
+
CONSENSUS_MASTER_PLAN: {
|
|
1398
|
+
phase: 'CONSENSUS_MASTER_PLAN' as const,
|
|
1399
|
+
pass: true,
|
|
1400
|
+
score: 0.80,
|
|
1401
|
+
blockers: [],
|
|
1402
|
+
missingArtifacts: [],
|
|
1403
|
+
failedChecks: [],
|
|
1404
|
+
consensusScore: 0.50,
|
|
1405
|
+
finalStatus: 'ARBITRATED',
|
|
1406
|
+
timestamp: '2024-01-01T00:00:00Z',
|
|
1407
|
+
},
|
|
1408
|
+
},
|
|
1409
|
+
} as any;
|
|
1410
|
+
|
|
1411
|
+
const newGateResult = {
|
|
1412
|
+
phase: 'CONSENSUS_MASTER_PLAN' as const,
|
|
1413
|
+
pass: true,
|
|
1414
|
+
blockers: [],
|
|
1415
|
+
missingArtifacts: [],
|
|
1416
|
+
failedChecks: [],
|
|
1417
|
+
timestamp: '2024-01-01T00:00:01Z',
|
|
1418
|
+
};
|
|
1419
|
+
|
|
1420
|
+
// Simulate mergeGateResult logic
|
|
1421
|
+
const existing = pipeline.gateResults['CONSENSUS_MASTER_PLAN'];
|
|
1422
|
+
pipeline.gateResults['CONSENSUS_MASTER_PLAN'] = {
|
|
1423
|
+
...newGateResult,
|
|
1424
|
+
score: existing.score ?? newGateResult.score,
|
|
1425
|
+
consensusScore: existing.consensusScore ?? newGateResult.consensusScore,
|
|
1426
|
+
finalStatus: existing.finalStatus ?? newGateResult.finalStatus,
|
|
1427
|
+
};
|
|
1428
|
+
|
|
1429
|
+
expect(pipeline.gateResults['CONSENSUS_MASTER_PLAN'].finalStatus).toBe('ARBITRATED');
|
|
1430
|
+
expect(pipeline.gateResults['CONSENSUS_MASTER_PLAN'].score).toBe(0.80);
|
|
1431
|
+
});
|
|
1432
|
+
});
|
|
1433
|
+
|
|
1434
|
+
// ─── v2.4.4: Version-increment / Arbitration Key Tests ────────
|
|
1435
|
+
|
|
1436
|
+
describe('version-increment arbitration key (v2.4.4)', () => {
|
|
1437
|
+
it('recoveryCount=0 -> version=1 -> arbitration key CONSENSUS_ARCHITECTURE@v1', () => {
|
|
1438
|
+
const recoveryCount = 0;
|
|
1439
|
+
const version = recoveryCount + 1;
|
|
1440
|
+
expect(version).toBe(1);
|
|
1441
|
+
|
|
1442
|
+
const key = `CONSENSUS_ARCHITECTURE@v${version}`;
|
|
1443
|
+
expect(key).toBe('CONSENSUS_ARCHITECTURE@v1');
|
|
1444
|
+
});
|
|
1445
|
+
|
|
1446
|
+
it('recoveryCount=1 -> version=2 -> arbitration key CONSENSUS_ARCHITECTURE@v2', () => {
|
|
1447
|
+
const recoveryCount = 1;
|
|
1448
|
+
const version = recoveryCount + 1;
|
|
1449
|
+
expect(version).toBe(2);
|
|
1450
|
+
|
|
1451
|
+
const key = `CONSENSUS_ARCHITECTURE@v${version}`;
|
|
1452
|
+
expect(key).toBe('CONSENSUS_ARCHITECTURE@v2');
|
|
1453
|
+
});
|
|
1454
|
+
|
|
1455
|
+
it('arbitrationAttempted Set does NOT block second run with different version', () => {
|
|
1456
|
+
const runner = new ConsensusRunner({
|
|
1457
|
+
mode: 'independent',
|
|
1458
|
+
minReviewers: 2,
|
|
1459
|
+
threshold: 0.95,
|
|
1460
|
+
quorum: 2,
|
|
1461
|
+
projectDir: '/tmp/test',
|
|
1462
|
+
enableArbitration: true,
|
|
1463
|
+
arbitratorProvider: { provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.2 },
|
|
1464
|
+
});
|
|
1465
|
+
|
|
1466
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1467
|
+
const attempted = (runner as any).arbitrationAttempted as Set<string>;
|
|
1468
|
+
|
|
1469
|
+
// First run: recoveryCount=0, version=1
|
|
1470
|
+
attempted.add('CONSENSUS_ARCHITECTURE@v1');
|
|
1471
|
+
expect(attempted.has('CONSENSUS_ARCHITECTURE@v1')).toBe(true);
|
|
1472
|
+
|
|
1473
|
+
// Second run after recovery: recoveryCount=1, version=2
|
|
1474
|
+
// Should NOT be blocked by the Set
|
|
1475
|
+
expect(attempted.has('CONSENSUS_ARCHITECTURE@v2')).toBe(false);
|
|
1476
|
+
|
|
1477
|
+
// Same for CONSENSUS_ROLE_PLANS
|
|
1478
|
+
attempted.add('CONSENSUS_ROLE_PLANS@v1');
|
|
1479
|
+
expect(attempted.has('CONSENSUS_ROLE_PLANS@v1')).toBe(true);
|
|
1480
|
+
expect(attempted.has('CONSENSUS_ROLE_PLANS@v2')).toBe(false);
|
|
1481
|
+
});
|
|
1482
|
+
|
|
1483
|
+
it('version=1 (default) blocks retry when version not incremented', () => {
|
|
1484
|
+
const runner = new ConsensusRunner({
|
|
1485
|
+
mode: 'independent',
|
|
1486
|
+
minReviewers: 2,
|
|
1487
|
+
threshold: 0.95,
|
|
1488
|
+
quorum: 2,
|
|
1489
|
+
projectDir: '/tmp/test',
|
|
1490
|
+
enableArbitration: true,
|
|
1491
|
+
arbitratorProvider: { provider: 'gemini', model: 'gemini-2.5-flash', temperature: 0.2 },
|
|
1492
|
+
});
|
|
1493
|
+
|
|
1494
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1495
|
+
const attempted = (runner as any).arbitrationAttempted as Set<string>;
|
|
1496
|
+
|
|
1497
|
+
// If version stays at 1 (bug: missing recoveryCount passthrough),
|
|
1498
|
+
// the Set WILL block retry
|
|
1499
|
+
attempted.add('CONSENSUS_ARCHITECTURE@v1');
|
|
1500
|
+
expect(attempted.has('CONSENSUS_ARCHITECTURE@v1')).toBe(true);
|
|
1501
|
+
// This is the bug scenario: version=1 again -> blocked
|
|
1502
|
+
const secondKey = `CONSENSUS_ARCHITECTURE@v${1}`;
|
|
1503
|
+
expect(attempted.has(secondKey)).toBe(true);
|
|
1504
|
+
});
|
|
1505
|
+
});
|
|
1506
|
+
|
|
1507
|
+
// ─── v2.4.3: Stale Master Plan Test ─────────────────────────
|
|
1508
|
+
|
|
1509
|
+
describe('latest master plan artifact (v2.4.3)', () => {
|
|
1510
|
+
it('reverse-find picks latest master_plan not stale v1', () => {
|
|
1511
|
+
const artifacts = [
|
|
1512
|
+
{ id: 'mp1', type: 'master_plan', phase: 'INTAKE', version: 1, timestamp: '2024-01-01T00:00:00Z' },
|
|
1513
|
+
{ id: 'other', type: 'constitution', phase: 'INTAKE', version: 1, timestamp: '2024-01-01T00:00:01Z' },
|
|
1514
|
+
{ id: 'mp2', type: 'master_plan', phase: 'INTAKE', version: 2, timestamp: '2024-01-02T00:00:00Z' },
|
|
1515
|
+
];
|
|
1516
|
+
|
|
1517
|
+
// Simulates the fix: [...artifacts].reverse().find()
|
|
1518
|
+
const latest = [...artifacts].reverse().find((a) => a.type === 'master_plan');
|
|
1519
|
+
expect(latest).toBeDefined();
|
|
1520
|
+
expect(latest!.id).toBe('mp2');
|
|
1521
|
+
expect(latest!.version).toBe(2);
|
|
1522
|
+
|
|
1523
|
+
// Verify old .find() would have returned stale v1
|
|
1524
|
+
const stale = artifacts.find((a) => a.type === 'master_plan');
|
|
1525
|
+
expect(stale!.id).toBe('mp1');
|
|
1526
|
+
expect(stale!.version).toBe(1);
|
|
1527
|
+
});
|
|
1528
|
+
});
|
|
206
1529
|
});
|