multi-model-debate 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- multi_model_debate/__init__.py +4 -0
- multi_model_debate/__main__.py +6 -0
- multi_model_debate/cli.py +290 -0
- multi_model_debate/config.py +271 -0
- multi_model_debate/exceptions.py +83 -0
- multi_model_debate/models/__init__.py +71 -0
- multi_model_debate/models/claude.py +168 -0
- multi_model_debate/models/cli_wrapper.py +233 -0
- multi_model_debate/models/gemini.py +66 -0
- multi_model_debate/models/openai.py +66 -0
- multi_model_debate/models/protocols.py +35 -0
- multi_model_debate/orchestrator.py +465 -0
- multi_model_debate/phases/__init__.py +22 -0
- multi_model_debate/phases/base.py +236 -0
- multi_model_debate/phases/baseline.py +117 -0
- multi_model_debate/phases/debate.py +154 -0
- multi_model_debate/phases/defense.py +186 -0
- multi_model_debate/phases/final_position.py +307 -0
- multi_model_debate/phases/judge.py +177 -0
- multi_model_debate/phases/synthesis.py +162 -0
- multi_model_debate/pre_debate.py +83 -0
- multi_model_debate/prompts/arbiter_prompt.md.j2 +24 -0
- multi_model_debate/prompts/arbiter_summary.md.j2 +102 -0
- multi_model_debate/prompts/baseline_critique.md.j2 +5 -0
- multi_model_debate/prompts/critic_1_lens.md.j2 +52 -0
- multi_model_debate/prompts/critic_2_lens.md.j2 +52 -0
- multi_model_debate/prompts/debate_round.md.j2 +14 -0
- multi_model_debate/prompts/defense_initial.md.j2 +9 -0
- multi_model_debate/prompts/defense_round.md.j2 +8 -0
- multi_model_debate/prompts/judge.md.j2 +34 -0
- multi_model_debate/prompts/judge_prompt.md.j2 +13 -0
- multi_model_debate/prompts/strategist_proxy_lens.md.j2 +33 -0
- multi_model_debate/prompts/synthesis_prompt.md.j2 +16 -0
- multi_model_debate/prompts/synthesis_template.md.j2 +44 -0
- multi_model_debate/prompts/winner_response.md.j2 +17 -0
- multi_model_debate/response_parser.py +268 -0
- multi_model_debate/roles.py +163 -0
- multi_model_debate/storage/__init__.py +17 -0
- multi_model_debate/storage/run.py +509 -0
- multi_model_debate-1.0.1.dist-info/METADATA +572 -0
- multi_model_debate-1.0.1.dist-info/RECORD +44 -0
- multi_model_debate-1.0.1.dist-info/WHEEL +4 -0
- multi_model_debate-1.0.1.dist-info/entry_points.txt +2 -0
- multi_model_debate-1.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
"""Phase 6: Strategist generates the Final Position."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
from rich.console import Console
|
|
11
|
+
|
|
12
|
+
from multi_model_debate.exceptions import PhaseError
|
|
13
|
+
from multi_model_debate.phases.base import Phase, PhaseArtifact
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from multi_model_debate.config import Config
|
|
17
|
+
from multi_model_debate.models.protocols import ModelBackend
|
|
18
|
+
|
|
19
|
+
console = Console()
|
|
20
|
+
|
|
21
|
+
# Valid status values for issue response checklist
|
|
22
|
+
VALID_CHECKLIST_STATUSES = frozenset({"ADDRESSED", "REJECTED", "DEFERRED", "NOT_APPLICABLE"})
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class ChecklistItem:
|
|
27
|
+
"""An item from the issue response checklist."""
|
|
28
|
+
|
|
29
|
+
issue_id: str
|
|
30
|
+
title: str
|
|
31
|
+
response: str
|
|
32
|
+
status: str
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def is_valid_status(self) -> bool:
|
|
36
|
+
"""Check if status is a valid value."""
|
|
37
|
+
return self.status.upper() in VALID_CHECKLIST_STATUSES
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class ChecklistCoverage:
|
|
42
|
+
"""Coverage statistics for issue response checklist."""
|
|
43
|
+
|
|
44
|
+
total: int
|
|
45
|
+
addressed: int
|
|
46
|
+
rejected: int
|
|
47
|
+
deferred: int
|
|
48
|
+
not_applicable: int
|
|
49
|
+
invalid: int
|
|
50
|
+
|
|
51
|
+
def summary(self) -> str:
|
|
52
|
+
"""Return a human-readable summary of coverage."""
|
|
53
|
+
if self.total == 0:
|
|
54
|
+
return "No checklist items found"
|
|
55
|
+
parts = []
|
|
56
|
+
if self.addressed > 0:
|
|
57
|
+
parts.append(f"{self.addressed} addressed")
|
|
58
|
+
if self.rejected > 0:
|
|
59
|
+
parts.append(f"{self.rejected} rejected")
|
|
60
|
+
if self.deferred > 0:
|
|
61
|
+
parts.append(f"{self.deferred} deferred")
|
|
62
|
+
if self.not_applicable > 0:
|
|
63
|
+
parts.append(f"{self.not_applicable} N/A")
|
|
64
|
+
if self.invalid > 0:
|
|
65
|
+
parts.append(f"{self.invalid} invalid")
|
|
66
|
+
return f"Issue checklist: {', '.join(parts)} (total: {self.total})"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def parse_checklist(content: str) -> list[ChecklistItem]:
|
|
70
|
+
"""Parse issue response checklist from Final Position content.
|
|
71
|
+
|
|
72
|
+
Looks for markdown table rows with the expected format:
|
|
73
|
+
| Issue ID | Title | Response | Status |
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
content: The Final Position markdown content.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
List of ChecklistItem objects parsed from the content.
|
|
80
|
+
"""
|
|
81
|
+
items: list[ChecklistItem] = []
|
|
82
|
+
|
|
83
|
+
# Find the checklist section
|
|
84
|
+
checklist_section = re.search(
|
|
85
|
+
r"##\s*(?:9\.\s*)?ISSUE RESPONSE CHECKLIST(.*?)(?=^##|\Z)",
|
|
86
|
+
content,
|
|
87
|
+
re.MULTILINE | re.DOTALL | re.IGNORECASE,
|
|
88
|
+
)
|
|
89
|
+
if not checklist_section:
|
|
90
|
+
return items
|
|
91
|
+
|
|
92
|
+
section_content = checklist_section.group(1)
|
|
93
|
+
|
|
94
|
+
# Parse markdown table rows (skip header row and separator)
|
|
95
|
+
# Match rows like: | ISSUE-001 | Title | Response | ADDRESSED |
|
|
96
|
+
row_pattern = re.compile(
|
|
97
|
+
r"^\s*\|\s*([^|]+?)\s*\|\s*([^|]+?)\s*\|\s*([^|]+?)\s*\|\s*([^|]+?)\s*\|?\s*$",
|
|
98
|
+
re.MULTILINE,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
for match in row_pattern.finditer(section_content):
|
|
102
|
+
issue_id, title, response, status = (
|
|
103
|
+
match.group(1).strip(),
|
|
104
|
+
match.group(2).strip(),
|
|
105
|
+
match.group(3).strip(),
|
|
106
|
+
match.group(4).strip(),
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Skip header row and separator row
|
|
110
|
+
if issue_id.lower() == "issue id" or issue_id.startswith("-"):
|
|
111
|
+
continue
|
|
112
|
+
if all(c in "-: " for c in issue_id):
|
|
113
|
+
continue
|
|
114
|
+
|
|
115
|
+
items.append(
|
|
116
|
+
ChecklistItem(
|
|
117
|
+
issue_id=issue_id,
|
|
118
|
+
title=title,
|
|
119
|
+
response=response,
|
|
120
|
+
status=status,
|
|
121
|
+
)
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
return items
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def calculate_coverage(items: list[ChecklistItem]) -> ChecklistCoverage:
|
|
128
|
+
"""Calculate coverage statistics from checklist items.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
items: List of ChecklistItem objects.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
ChecklistCoverage with counts for each status.
|
|
135
|
+
"""
|
|
136
|
+
addressed = 0
|
|
137
|
+
rejected = 0
|
|
138
|
+
deferred = 0
|
|
139
|
+
not_applicable = 0
|
|
140
|
+
invalid = 0
|
|
141
|
+
|
|
142
|
+
for item in items:
|
|
143
|
+
status_upper = item.status.upper()
|
|
144
|
+
if status_upper == "ADDRESSED":
|
|
145
|
+
addressed += 1
|
|
146
|
+
elif status_upper == "REJECTED":
|
|
147
|
+
rejected += 1
|
|
148
|
+
elif status_upper == "DEFERRED":
|
|
149
|
+
deferred += 1
|
|
150
|
+
elif status_upper == "NOT_APPLICABLE":
|
|
151
|
+
not_applicable += 1
|
|
152
|
+
else:
|
|
153
|
+
invalid += 1
|
|
154
|
+
|
|
155
|
+
return ChecklistCoverage(
|
|
156
|
+
total=len(items),
|
|
157
|
+
addressed=addressed,
|
|
158
|
+
rejected=rejected,
|
|
159
|
+
deferred=deferred,
|
|
160
|
+
not_applicable=not_applicable,
|
|
161
|
+
invalid=invalid,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class FinalPositionPhase(Phase):
|
|
166
|
+
"""Phase 6: Strategist generates the Final Position.
|
|
167
|
+
|
|
168
|
+
Fully automated phase where Strategist produces a structured summary with
|
|
169
|
+
recommendations for a non-technical human arbiter to make final decisions.
|
|
170
|
+
|
|
171
|
+
DESIGN: Fully automated via CLI calls. Human is notified ONLY at the end
|
|
172
|
+
when the Final Position is ready for review.
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
def __init__(
|
|
176
|
+
self,
|
|
177
|
+
run_dir: Path,
|
|
178
|
+
config: Config,
|
|
179
|
+
strategist: ModelBackend,
|
|
180
|
+
) -> None:
|
|
181
|
+
"""Initialize the final position phase.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
run_dir: Directory for this run's artifacts.
|
|
185
|
+
config: Configuration settings.
|
|
186
|
+
strategist: Strategist model backend (uses CLI invocation).
|
|
187
|
+
"""
|
|
188
|
+
super().__init__(run_dir, config)
|
|
189
|
+
self.strategist = strategist
|
|
190
|
+
self._defense_rounds = config.debate.strategist_rounds
|
|
191
|
+
|
|
192
|
+
@property
|
|
193
|
+
def name(self) -> str:
|
|
194
|
+
"""Phase identifier."""
|
|
195
|
+
return "PHASE_6"
|
|
196
|
+
|
|
197
|
+
@property
|
|
198
|
+
def display_name(self) -> str:
|
|
199
|
+
"""Human-readable phase name."""
|
|
200
|
+
return "Final Position"
|
|
201
|
+
|
|
202
|
+
def required_artifacts(self) -> list[PhaseArtifact]:
|
|
203
|
+
"""Artifacts required for phase completion."""
|
|
204
|
+
return [self.artifact("p6_final_position")]
|
|
205
|
+
|
|
206
|
+
def run(self) -> None:
|
|
207
|
+
"""Execute the final position phase.
|
|
208
|
+
|
|
209
|
+
Strategist generates a comprehensive Final Position with full debate context.
|
|
210
|
+
Fully automated via CLI invocation. Validates issue response checklist.
|
|
211
|
+
"""
|
|
212
|
+
final_position_artifact = self.artifact("p6_final_position")
|
|
213
|
+
|
|
214
|
+
if not final_position_artifact.is_valid():
|
|
215
|
+
console.print(" [bold cyan]Generating Final Position...[/bold cyan]")
|
|
216
|
+
|
|
217
|
+
# Gather all context
|
|
218
|
+
game_plan = self.get_game_plan()
|
|
219
|
+
winner = self._get_winner()
|
|
220
|
+
judge_decision = self.artifact("p3_winner_decision").read()
|
|
221
|
+
peer_review = self.artifact("p4_peer_review").read()
|
|
222
|
+
final_winner = self.artifact(f"p5_r{self._defense_rounds}_winner").read()
|
|
223
|
+
final_strategist = self.artifact(f"p5_r{self._defense_rounds}_strategist").read()
|
|
224
|
+
|
|
225
|
+
arbiter_template = self.render_template("arbiter_summary.md.j2")
|
|
226
|
+
|
|
227
|
+
prompt = self.render_template(
|
|
228
|
+
"arbiter_prompt.md.j2",
|
|
229
|
+
arbiter_template=arbiter_template,
|
|
230
|
+
game_plan=game_plan,
|
|
231
|
+
winner=winner,
|
|
232
|
+
judge_decision=judge_decision,
|
|
233
|
+
peer_review=peer_review,
|
|
234
|
+
final_winner=final_winner,
|
|
235
|
+
final_strategist=final_strategist,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# Automated CLI invocation - no more file-based handoff
|
|
239
|
+
response = self.strategist.generate(prompt) # Uses per-model timeout
|
|
240
|
+
final_position_artifact.write(response)
|
|
241
|
+
# Journal the Strategist response for audit trail
|
|
242
|
+
self.journal_response(round_num=0, response=response)
|
|
243
|
+
console.print(" [green]Final Position complete[/green]")
|
|
244
|
+
|
|
245
|
+
# Validate issue response checklist (warn only, don't fail)
|
|
246
|
+
self._validate_checklist(response)
|
|
247
|
+
else:
|
|
248
|
+
console.print(" [dim]Final Position (cached)[/dim]")
|
|
249
|
+
# Also validate checklist on cached content
|
|
250
|
+
self._validate_checklist(final_position_artifact.read())
|
|
251
|
+
|
|
252
|
+
def _validate_checklist(self, content: str) -> None:
|
|
253
|
+
"""Validate the issue response checklist in Final Position.
|
|
254
|
+
|
|
255
|
+
Warns if checklist is missing or incomplete. Does not fail.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
content: The Final Position content.
|
|
259
|
+
"""
|
|
260
|
+
items = parse_checklist(content)
|
|
261
|
+
|
|
262
|
+
if not items:
|
|
263
|
+
console.print(
|
|
264
|
+
" [yellow]Warning: No issue response checklist found in Final Position.[/yellow]"
|
|
265
|
+
)
|
|
266
|
+
console.print(" [dim]The checklist helps verify all critiques were addressed.[/dim]")
|
|
267
|
+
return
|
|
268
|
+
|
|
269
|
+
coverage = calculate_coverage(items)
|
|
270
|
+
console.print(f" [dim]{coverage.summary()}[/dim]")
|
|
271
|
+
|
|
272
|
+
if coverage.invalid > 0:
|
|
273
|
+
console.print(
|
|
274
|
+
f" [yellow]Warning: {coverage.invalid} checklist item(s) "
|
|
275
|
+
"have invalid status values.[/yellow]"
|
|
276
|
+
)
|
|
277
|
+
console.print(
|
|
278
|
+
" [dim]Valid statuses: ADDRESSED, REJECTED, DEFERRED, NOT_APPLICABLE[/dim]"
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
def _get_winner(self) -> str:
|
|
282
|
+
"""Get the winner from Phase 3."""
|
|
283
|
+
winner_path = self.run_dir / "p3_winner.txt"
|
|
284
|
+
if not winner_path.exists():
|
|
285
|
+
raise PhaseError("Winner file not found")
|
|
286
|
+
content = winner_path.read_text().strip()
|
|
287
|
+
if content.startswith("WINNER="):
|
|
288
|
+
return content.split("=")[1].strip()
|
|
289
|
+
raise PhaseError(f"Invalid winner file: {content}")
|
|
290
|
+
|
|
291
|
+
def get_final_position(self) -> str:
|
|
292
|
+
"""Get the Final Position content.
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
The Final Position text.
|
|
296
|
+
"""
|
|
297
|
+
return self.artifact("p6_final_position").read()
|
|
298
|
+
|
|
299
|
+
def display_final_position(self) -> None:
|
|
300
|
+
"""Display the Final Position to the console."""
|
|
301
|
+
final_position = self.get_final_position()
|
|
302
|
+
console.print()
|
|
303
|
+
console.print("=" * 70, style="bold green")
|
|
304
|
+
console.print(" ADVERSARIAL REVIEW COMPLETE", style="bold white")
|
|
305
|
+
console.print("=" * 70, style="bold green")
|
|
306
|
+
console.print()
|
|
307
|
+
console.print(final_position)
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""Phase 3: Judge determines the winner of the critic debate."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
|
|
11
|
+
from multi_model_debate.exceptions import PhaseError
|
|
12
|
+
from multi_model_debate.phases.base import Phase, PhaseArtifact
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from multi_model_debate.config import Config
|
|
16
|
+
from multi_model_debate.models.protocols import ModelBackend
|
|
17
|
+
|
|
18
|
+
console = Console()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class JudgePhase(Phase):
|
|
22
|
+
"""Phase 3: Judge determines which critic won the debate.
|
|
23
|
+
|
|
24
|
+
Judge evaluates based on issue quality, consistency, evidence,
|
|
25
|
+
and novelty - NOT rhetorical persuasiveness.
|
|
26
|
+
|
|
27
|
+
DESIGN: Judge = Strategist's model family (isolated instance)
|
|
28
|
+
|
|
29
|
+
The Judge evaluates CRITICS' arguments, not the plan directly.
|
|
30
|
+
However, the Judge must read the plan to assess critique validity.
|
|
31
|
+
Since Judge is different family from both Critics, no bias.
|
|
32
|
+
|
|
33
|
+
See REQUIREMENTS_V2.md for full rationale and evidence.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
run_dir: Path,
|
|
39
|
+
config: Config,
|
|
40
|
+
*,
|
|
41
|
+
judge: ModelBackend,
|
|
42
|
+
critic_a_name: str,
|
|
43
|
+
critic_b_name: str,
|
|
44
|
+
) -> None:
|
|
45
|
+
"""Initialize the judge phase.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
run_dir: Directory for this run's artifacts.
|
|
49
|
+
config: Configuration settings.
|
|
50
|
+
judge: Judge model backend (non-interactive).
|
|
51
|
+
critic_a_name: Display name for first critic (e.g., "codex").
|
|
52
|
+
critic_b_name: Display name for second critic (e.g., "gemini").
|
|
53
|
+
"""
|
|
54
|
+
super().__init__(run_dir, config)
|
|
55
|
+
self.judge = judge
|
|
56
|
+
self.critic_a_name = critic_a_name
|
|
57
|
+
self.critic_b_name = critic_b_name
|
|
58
|
+
self._rounds = config.debate.critic_rounds
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def name(self) -> str:
|
|
62
|
+
"""Phase identifier."""
|
|
63
|
+
return "PHASE_3"
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def display_name(self) -> str:
|
|
67
|
+
"""Human-readable phase name."""
|
|
68
|
+
return "Winner Determination"
|
|
69
|
+
|
|
70
|
+
def required_artifacts(self) -> list[PhaseArtifact]:
|
|
71
|
+
"""Artifacts required for phase completion."""
|
|
72
|
+
return [
|
|
73
|
+
self.artifact("p3_winner_decision"),
|
|
74
|
+
PhaseArtifact(
|
|
75
|
+
name="p3_winner",
|
|
76
|
+
path=self.run_dir / "p3_winner.txt",
|
|
77
|
+
min_length=3, # Critic name (e.g., "codex", "gemini")
|
|
78
|
+
),
|
|
79
|
+
]
|
|
80
|
+
|
|
81
|
+
def run(self) -> None:
|
|
82
|
+
"""Execute the judge phase.
|
|
83
|
+
|
|
84
|
+
Judge evaluates final critic positions and determines the winner.
|
|
85
|
+
"""
|
|
86
|
+
decision_artifact = self.artifact("p3_winner_decision")
|
|
87
|
+
winner_artifact = PhaseArtifact(
|
|
88
|
+
name="p3_winner",
|
|
89
|
+
path=self.run_dir / "p3_winner.txt",
|
|
90
|
+
min_length=3,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
if not decision_artifact.is_valid():
|
|
94
|
+
console.print(" [cyan]Judge evaluating debate...[/cyan]")
|
|
95
|
+
|
|
96
|
+
game_plan = self.get_game_plan()
|
|
97
|
+
judge_template = self.render_template("judge.md.j2")
|
|
98
|
+
|
|
99
|
+
# Get final positions from debate
|
|
100
|
+
critic_a_final = self.artifact(
|
|
101
|
+
f"p2_r{self._rounds}_{self.critic_a_name}", is_json=True
|
|
102
|
+
).read()
|
|
103
|
+
critic_b_final = self.artifact(
|
|
104
|
+
f"p2_r{self._rounds}_{self.critic_b_name}", is_json=True
|
|
105
|
+
).read()
|
|
106
|
+
|
|
107
|
+
prompt = self.render_template(
|
|
108
|
+
"judge_prompt.md.j2",
|
|
109
|
+
judge_template=judge_template,
|
|
110
|
+
game_plan=game_plan,
|
|
111
|
+
critic_a_name=self.critic_a_name,
|
|
112
|
+
critic_b_name=self.critic_b_name,
|
|
113
|
+
critic_a_final=critic_a_final,
|
|
114
|
+
critic_b_final=critic_b_final,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
response = self.judge.generate(prompt) # Uses per-model timeout from config
|
|
118
|
+
decision_artifact.write(response)
|
|
119
|
+
|
|
120
|
+
# Extract winner
|
|
121
|
+
winner = self._extract_winner(response)
|
|
122
|
+
winner_artifact.path.write_text(f"WINNER={winner}\n")
|
|
123
|
+
|
|
124
|
+
console.print(f" [green]Winner: {winner}[/green]")
|
|
125
|
+
else:
|
|
126
|
+
winner = self.get_winner()
|
|
127
|
+
console.print(f" [dim]Judge decision (cached) - Winner: {winner}[/dim]")
|
|
128
|
+
|
|
129
|
+
def _extract_winner(self, decision: str) -> str:
|
|
130
|
+
"""Extract the winner from the judge's decision.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
decision: The full judge decision text.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
The winning critic name (e.g., "codex" or "gemini").
|
|
137
|
+
|
|
138
|
+
Raises:
|
|
139
|
+
PhaseError: If winner cannot be determined.
|
|
140
|
+
"""
|
|
141
|
+
# Build pattern from actual critic names
|
|
142
|
+
critic_names = f"{self.critic_a_name}|{self.critic_b_name}"
|
|
143
|
+
pattern = rf"(?:winner|winning)[^a-z]*({critic_names})"
|
|
144
|
+
match = re.search(pattern, decision, re.IGNORECASE)
|
|
145
|
+
|
|
146
|
+
if not match:
|
|
147
|
+
raise PhaseError(
|
|
148
|
+
f"Could not determine winner from judge output. "
|
|
149
|
+
f"Expected one of: {self.critic_a_name}, {self.critic_b_name}. "
|
|
150
|
+
f"Please review: {self.run_dir / 'p3_winner_decision.md'}"
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
winner = match.group(1).lower()
|
|
154
|
+
|
|
155
|
+
if winner not in (self.critic_a_name, self.critic_b_name):
|
|
156
|
+
raise PhaseError(f"Invalid winner: {winner}")
|
|
157
|
+
|
|
158
|
+
return winner
|
|
159
|
+
|
|
160
|
+
def get_winner(self) -> str:
|
|
161
|
+
"""Get the winner from the winner file.
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
The winning critic name (e.g., "codex" or "gemini").
|
|
165
|
+
|
|
166
|
+
Raises:
|
|
167
|
+
PhaseError: If winner file doesn't exist or is invalid.
|
|
168
|
+
"""
|
|
169
|
+
winner_path = self.run_dir / "p3_winner.txt"
|
|
170
|
+
if not winner_path.exists():
|
|
171
|
+
raise PhaseError("Winner file not found")
|
|
172
|
+
|
|
173
|
+
content = winner_path.read_text().strip()
|
|
174
|
+
if content.startswith("WINNER="):
|
|
175
|
+
return content.split("=")[1].strip()
|
|
176
|
+
|
|
177
|
+
raise PhaseError(f"Invalid winner file format: {content}")
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""Phase 4: Winner produces Peer Review for Strategist to defend against."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
|
|
10
|
+
from multi_model_debate.exceptions import PhaseError
|
|
11
|
+
from multi_model_debate.phases.base import Phase, PhaseArtifact
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from multi_model_debate.config import Config
|
|
15
|
+
from multi_model_debate.models.protocols import ModelBackend
|
|
16
|
+
|
|
17
|
+
console = Console()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class PeerReviewPhase(Phase):
|
|
21
|
+
"""Phase 4: The debate winner produces the Peer Review.
|
|
22
|
+
|
|
23
|
+
The winner consolidates their critiques and adopts valid points
|
|
24
|
+
from the loser, producing a structured Peer Review for the Strategist to defend against.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
run_dir: Path,
|
|
30
|
+
config: Config,
|
|
31
|
+
*,
|
|
32
|
+
critic_a: ModelBackend,
|
|
33
|
+
critic_b: ModelBackend,
|
|
34
|
+
critic_a_name: str,
|
|
35
|
+
critic_b_name: str,
|
|
36
|
+
) -> None:
|
|
37
|
+
"""Initialize the peer review phase.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
run_dir: Directory for this run's artifacts.
|
|
41
|
+
config: Configuration settings.
|
|
42
|
+
critic_a: First critic model backend.
|
|
43
|
+
critic_b: Second critic model backend.
|
|
44
|
+
critic_a_name: Display name for first critic (e.g., "codex").
|
|
45
|
+
critic_b_name: Display name for second critic (e.g., "gemini").
|
|
46
|
+
"""
|
|
47
|
+
super().__init__(run_dir, config)
|
|
48
|
+
self.critic_a = critic_a
|
|
49
|
+
self.critic_b = critic_b
|
|
50
|
+
self.critic_a_name = critic_a_name
|
|
51
|
+
self.critic_b_name = critic_b_name
|
|
52
|
+
self._rounds = config.debate.critic_rounds
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def name(self) -> str:
|
|
56
|
+
"""Phase identifier."""
|
|
57
|
+
return "PHASE_4"
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def display_name(self) -> str:
|
|
61
|
+
"""Human-readable phase name."""
|
|
62
|
+
return "Peer Review"
|
|
63
|
+
|
|
64
|
+
def required_artifacts(self) -> list[PhaseArtifact]:
|
|
65
|
+
"""Artifacts required for phase completion."""
|
|
66
|
+
return [self.artifact("p4_peer_review")]
|
|
67
|
+
|
|
68
|
+
def run(self) -> None:
|
|
69
|
+
"""Execute the peer review phase.
|
|
70
|
+
|
|
71
|
+
The winner produces a comprehensive Peer Review for the Strategist to defend against.
|
|
72
|
+
"""
|
|
73
|
+
peer_review_artifact = self.artifact("p4_peer_review")
|
|
74
|
+
|
|
75
|
+
if not peer_review_artifact.is_valid():
|
|
76
|
+
winner_name = self._get_winner_name()
|
|
77
|
+
console.print(f" [cyan]{winner_name} generating peer review...[/cyan]")
|
|
78
|
+
|
|
79
|
+
game_plan = self.get_game_plan()
|
|
80
|
+
synthesis_template = self.render_template("synthesis_template.md.j2")
|
|
81
|
+
|
|
82
|
+
# Get final positions from debate
|
|
83
|
+
critic_a_final = self.artifact(
|
|
84
|
+
f"p2_r{self._rounds}_{self.critic_a_name}", is_json=True
|
|
85
|
+
).read()
|
|
86
|
+
critic_b_final = self.artifact(
|
|
87
|
+
f"p2_r{self._rounds}_{self.critic_b_name}", is_json=True
|
|
88
|
+
).read()
|
|
89
|
+
|
|
90
|
+
# Determine winner/loser based on winner name
|
|
91
|
+
if winner_name == self.critic_a_name:
|
|
92
|
+
winner_final = critic_a_final
|
|
93
|
+
loser_final = critic_b_final
|
|
94
|
+
winner_model = self.critic_a
|
|
95
|
+
else:
|
|
96
|
+
winner_final = critic_b_final
|
|
97
|
+
loser_final = critic_a_final
|
|
98
|
+
winner_model = self.critic_b
|
|
99
|
+
|
|
100
|
+
prompt = self.render_template(
|
|
101
|
+
"synthesis_prompt.md.j2",
|
|
102
|
+
synthesis_template=synthesis_template,
|
|
103
|
+
game_plan=game_plan,
|
|
104
|
+
winner_final=winner_final,
|
|
105
|
+
loser_final=loser_final,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
response = winner_model.generate(prompt) # Uses per-model timeout
|
|
109
|
+
peer_review_artifact.write(response)
|
|
110
|
+
console.print(" [green]Peer review complete[/green]")
|
|
111
|
+
else:
|
|
112
|
+
console.print(" [dim]Peer review (cached)[/dim]")
|
|
113
|
+
|
|
114
|
+
def _get_winner_name(self) -> str:
|
|
115
|
+
"""Get the winner's model name from Phase 3.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
The critic name (e.g., "codex" or "gemini").
|
|
119
|
+
|
|
120
|
+
Raises:
|
|
121
|
+
PhaseError: If winner file doesn't exist or format is invalid.
|
|
122
|
+
"""
|
|
123
|
+
winner_path = self.run_dir / "p3_winner.txt"
|
|
124
|
+
if not winner_path.exists():
|
|
125
|
+
raise PhaseError("Winner file not found - Phase 3 must complete first")
|
|
126
|
+
|
|
127
|
+
content = winner_path.read_text().strip()
|
|
128
|
+
if content.startswith("WINNER="):
|
|
129
|
+
return content.split("=")[1].strip()
|
|
130
|
+
|
|
131
|
+
raise PhaseError(f"Invalid winner file format: {content}")
|
|
132
|
+
|
|
133
|
+
def get_peer_review(self) -> str:
|
|
134
|
+
"""Get the peer review content.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
The peer review text.
|
|
138
|
+
"""
|
|
139
|
+
return self.artifact("p4_peer_review").read()
|
|
140
|
+
|
|
141
|
+
def get_winner_model(self) -> ModelBackend:
|
|
142
|
+
"""Get the winning model backend.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
The winning critic backend.
|
|
146
|
+
"""
|
|
147
|
+
winner_name = self._get_winner_name()
|
|
148
|
+
return self.critic_a if winner_name == self.critic_a_name else self.critic_b
|
|
149
|
+
|
|
150
|
+
def get_winner_lens(self) -> str:
|
|
151
|
+
"""Get the winner's lens prompt.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
The lens template content for the winning critic.
|
|
155
|
+
"""
|
|
156
|
+
winner_name = self._get_winner_name()
|
|
157
|
+
# Use critic A's lens for critic A, critic B's lens for critic B
|
|
158
|
+
if winner_name == self.critic_a_name:
|
|
159
|
+
template = "critic_1_lens.md.j2"
|
|
160
|
+
else:
|
|
161
|
+
template = "critic_2_lens.md.j2"
|
|
162
|
+
return self.render_template(template)
|