multi-model-debate 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. multi_model_debate/__init__.py +4 -0
  2. multi_model_debate/__main__.py +6 -0
  3. multi_model_debate/cli.py +290 -0
  4. multi_model_debate/config.py +271 -0
  5. multi_model_debate/exceptions.py +83 -0
  6. multi_model_debate/models/__init__.py +71 -0
  7. multi_model_debate/models/claude.py +168 -0
  8. multi_model_debate/models/cli_wrapper.py +233 -0
  9. multi_model_debate/models/gemini.py +66 -0
  10. multi_model_debate/models/openai.py +66 -0
  11. multi_model_debate/models/protocols.py +35 -0
  12. multi_model_debate/orchestrator.py +465 -0
  13. multi_model_debate/phases/__init__.py +22 -0
  14. multi_model_debate/phases/base.py +236 -0
  15. multi_model_debate/phases/baseline.py +117 -0
  16. multi_model_debate/phases/debate.py +154 -0
  17. multi_model_debate/phases/defense.py +186 -0
  18. multi_model_debate/phases/final_position.py +307 -0
  19. multi_model_debate/phases/judge.py +177 -0
  20. multi_model_debate/phases/synthesis.py +162 -0
  21. multi_model_debate/pre_debate.py +83 -0
  22. multi_model_debate/prompts/arbiter_prompt.md.j2 +24 -0
  23. multi_model_debate/prompts/arbiter_summary.md.j2 +102 -0
  24. multi_model_debate/prompts/baseline_critique.md.j2 +5 -0
  25. multi_model_debate/prompts/critic_1_lens.md.j2 +52 -0
  26. multi_model_debate/prompts/critic_2_lens.md.j2 +52 -0
  27. multi_model_debate/prompts/debate_round.md.j2 +14 -0
  28. multi_model_debate/prompts/defense_initial.md.j2 +9 -0
  29. multi_model_debate/prompts/defense_round.md.j2 +8 -0
  30. multi_model_debate/prompts/judge.md.j2 +34 -0
  31. multi_model_debate/prompts/judge_prompt.md.j2 +13 -0
  32. multi_model_debate/prompts/strategist_proxy_lens.md.j2 +33 -0
  33. multi_model_debate/prompts/synthesis_prompt.md.j2 +16 -0
  34. multi_model_debate/prompts/synthesis_template.md.j2 +44 -0
  35. multi_model_debate/prompts/winner_response.md.j2 +17 -0
  36. multi_model_debate/response_parser.py +268 -0
  37. multi_model_debate/roles.py +163 -0
  38. multi_model_debate/storage/__init__.py +17 -0
  39. multi_model_debate/storage/run.py +509 -0
  40. multi_model_debate-1.0.1.dist-info/METADATA +572 -0
  41. multi_model_debate-1.0.1.dist-info/RECORD +44 -0
  42. multi_model_debate-1.0.1.dist-info/WHEEL +4 -0
  43. multi_model_debate-1.0.1.dist-info/entry_points.txt +2 -0
  44. multi_model_debate-1.0.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,236 @@
1
+ """Base classes for review phases."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from abc import ABC, abstractmethod
7
+ from dataclasses import dataclass
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import TYPE_CHECKING
11
+
12
+ from jinja2 import Environment, FileSystemLoader, StrictUndefined
13
+
14
+ from multi_model_debate.response_parser import extract_json_block, is_valid_response
15
+
16
+ if TYPE_CHECKING:
17
+ from multi_model_debate.config import Config
18
+
19
+
20
+ @dataclass
21
+ class PhaseArtifact:
22
+ """Represents a file artifact produced by a phase.
23
+
24
+ Used for checkpoint validation - a phase is complete when all
25
+ its required artifacts exist and are valid.
26
+
27
+ Attributes:
28
+ name: Artifact identifier.
29
+ path: Path for the artifact file.
30
+ min_length: Minimum content length for non-JSON responses.
31
+ is_json: If True, save JSON cleanly without fences and use .json extension.
32
+ """
33
+
34
+ name: str
35
+ path: Path
36
+ min_length: int = 100
37
+ is_json: bool = False
38
+
39
+ def exists(self) -> bool:
40
+ """Check if the artifact file exists."""
41
+ return self.path.exists()
42
+
43
+ def is_valid(self) -> bool:
44
+ """Check if artifact exists and meets validity criteria.
45
+
46
+ Uses JSON-aware validation that accepts:
47
+ - Valid JSON responses (any length)
48
+ - "NO NEW ISSUES" format for convergence
49
+ - Non-JSON responses meeting minimum length
50
+
51
+ Returns:
52
+ True if artifact exists and is valid.
53
+ """
54
+ if not self.path.exists():
55
+ return False
56
+ content = self.path.read_text()
57
+ return is_valid_response(content, self.min_length)
58
+
59
+ def read(self) -> str:
60
+ """Read the artifact content.
61
+
62
+ Returns:
63
+ The artifact file content.
64
+
65
+ Raises:
66
+ FileNotFoundError: If artifact doesn't exist.
67
+ """
68
+ if not self.path.exists():
69
+ raise FileNotFoundError(f"Artifact not found: {self.path}")
70
+ return self.path.read_text()
71
+
72
+ def write(self, content: str) -> None:
73
+ """Write content to the artifact file.
74
+
75
+ If is_json=True, extracts clean JSON from markdown fences
76
+ before saving. This ensures artifacts are stored as valid
77
+ JSON files without markdown wrapper.
78
+
79
+ Args:
80
+ content: The content to write.
81
+ """
82
+ if self.is_json:
83
+ # Try to extract clean JSON from markdown fences
84
+ json_content = extract_json_block(content)
85
+ if json_content is not None:
86
+ self.path.write_text(json_content)
87
+ return
88
+
89
+ # Write content as-is (either not JSON or no fences to strip)
90
+ self.path.write_text(content)
91
+
92
+
93
+ class Phase(ABC):
94
+ """Base class for review phases.
95
+
96
+ Each phase represents a stage in the adversarial review workflow.
97
+ Phases produce artifacts (files) that can be validated for checkpoint/resume.
98
+ """
99
+
100
+ def __init__(self, run_dir: Path, config: Config) -> None:
101
+ """Initialize the phase.
102
+
103
+ Args:
104
+ run_dir: Directory for this run's artifacts.
105
+ config: Configuration settings.
106
+ """
107
+ self.run_dir = run_dir
108
+ self.config = config
109
+ self._template_env: Environment | None = None
110
+
111
+ @property
112
+ @abstractmethod
113
+ def name(self) -> str:
114
+ """Phase identifier for checkpointing (e.g., 'PHASE_1')."""
115
+ ...
116
+
117
+ @property
118
+ @abstractmethod
119
+ def display_name(self) -> str:
120
+ """Human-readable phase name for logging."""
121
+ ...
122
+
123
+ @abstractmethod
124
+ def required_artifacts(self) -> list[PhaseArtifact]:
125
+ """Artifacts that must exist for phase to be complete.
126
+
127
+ Returns:
128
+ List of PhaseArtifact instances.
129
+ """
130
+ ...
131
+
132
+ @abstractmethod
133
+ def run(self) -> None:
134
+ """Execute the phase.
135
+
136
+ May be partial if resuming - implementations should check
137
+ if individual artifacts already exist before generating them.
138
+ """
139
+ ...
140
+
141
+ def is_complete(self) -> bool:
142
+ """Check if all required artifacts are valid.
143
+
144
+ Returns:
145
+ True if phase is complete.
146
+ """
147
+ return all(artifact.is_valid() for artifact in self.required_artifacts())
148
+
149
+ @property
150
+ def template_env(self) -> Environment:
151
+ """Get the Jinja2 template environment.
152
+
153
+ Lazily creates the environment on first access.
154
+ """
155
+ if self._template_env is None:
156
+ prompts_dir = Path(__file__).parent.parent / "prompts"
157
+ self._template_env = Environment(
158
+ loader=FileSystemLoader(prompts_dir),
159
+ undefined=StrictUndefined,
160
+ trim_blocks=True,
161
+ lstrip_blocks=True,
162
+ )
163
+ return self._template_env
164
+
165
+ def render_template(self, template_name: str, **kwargs: object) -> str:
166
+ """Render a Jinja2 template.
167
+
168
+ Args:
169
+ template_name: Name of template file (e.g., 'critic_1_lens.md.j2').
170
+ **kwargs: Template variables.
171
+
172
+ Returns:
173
+ Rendered template string.
174
+ """
175
+ template = self.template_env.get_template(template_name)
176
+ return template.render(**kwargs)
177
+
178
+ def get_game_plan(self) -> str:
179
+ """Read the game plan from the run directory.
180
+
181
+ Returns:
182
+ The game plan content.
183
+ """
184
+ return (self.run_dir / "00_game_plan.md").read_text()
185
+
186
+ def artifact(
187
+ self,
188
+ name: str,
189
+ filename: str | None = None,
190
+ *,
191
+ is_json: bool = False,
192
+ ) -> PhaseArtifact:
193
+ """Create a PhaseArtifact for this phase.
194
+
195
+ Args:
196
+ name: Artifact name.
197
+ filename: File name. If is_json=True, defaults to name + '.json',
198
+ otherwise defaults to name + '.md'.
199
+ is_json: If True, save as clean JSON with .json extension.
200
+
201
+ Returns:
202
+ PhaseArtifact instance.
203
+ """
204
+ if filename is None:
205
+ extension = ".json" if is_json else ".md"
206
+ filename = f"{name}{extension}"
207
+
208
+ return PhaseArtifact(
209
+ name=name,
210
+ path=self.run_dir / filename,
211
+ min_length=self.config.models.min_response_length,
212
+ is_json=is_json,
213
+ )
214
+
215
+ def journal_response(self, round_num: int, response: str) -> None:
216
+ """Journal a Strategist response for audit trail.
217
+
218
+ Appends entry to strategist_journal.jsonl in JSONL format.
219
+ Used by DefensePhase and FinalPositionPhase to record Strategist outputs.
220
+
221
+ See REQUIREMENTS_V2.md Section 5 for journaling rationale.
222
+
223
+ Args:
224
+ round_num: Round number within the phase (0 for initial/only).
225
+ response: The Strategist's response text.
226
+ """
227
+ journal_path = self.run_dir / "strategist_journal.jsonl"
228
+ entry = {
229
+ "timestamp": datetime.now().isoformat(),
230
+ "phase": self.name,
231
+ "round": round_num,
232
+ "response_length": len(response),
233
+ "response": response,
234
+ }
235
+ with journal_path.open("a") as f:
236
+ f.write(json.dumps(entry) + "\n")
@@ -0,0 +1,117 @@
1
+ """Phase 1: Independent baseline critiques from critics."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING, Any
8
+
9
+ from rich.console import Console
10
+
11
+ from multi_model_debate.phases.base import Phase, PhaseArtifact
12
+
13
+ if TYPE_CHECKING:
14
+ from multi_model_debate.config import Config
15
+ from multi_model_debate.models.protocols import ModelBackend
16
+
17
+ console = Console()
18
+
19
+
20
+ class BaselinePhase(Phase):
21
+ """Phase 1: Critics independently critique the proposal.
22
+
23
+ Each critic uses an adversarial "assume flawed" persona with different
24
+ focus areas. Critics do NOT see each other's critiques.
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ run_dir: Path,
30
+ config: Config,
31
+ *,
32
+ critic_a: ModelBackend,
33
+ critic_b: ModelBackend,
34
+ critic_a_name: str,
35
+ critic_b_name: str,
36
+ ) -> None:
37
+ """Initialize the baseline phase.
38
+
39
+ Args:
40
+ run_dir: Directory for this run's artifacts.
41
+ config: Configuration settings.
42
+ critic_a: First critic model backend.
43
+ critic_b: Second critic model backend.
44
+ critic_a_name: Display name for first critic (e.g., "codex").
45
+ critic_b_name: Display name for second critic (e.g., "gemini").
46
+ """
47
+ super().__init__(run_dir, config)
48
+ self.critic_a = critic_a
49
+ self.critic_b = critic_b
50
+ self.critic_a_name = critic_a_name
51
+ self.critic_b_name = critic_b_name
52
+
53
+ @property
54
+ def name(self) -> str:
55
+ """Phase identifier."""
56
+ return "PHASE_1"
57
+
58
+ @property
59
+ def display_name(self) -> str:
60
+ """Human-readable phase name."""
61
+ return "Baseline Critiques"
62
+
63
+ def required_artifacts(self) -> list[PhaseArtifact]:
64
+ """Artifacts required for phase completion."""
65
+ return [
66
+ self.artifact(f"p1_{self.critic_a_name}_baseline", is_json=True),
67
+ self.artifact(f"p1_{self.critic_b_name}_baseline", is_json=True),
68
+ ]
69
+
70
+ def run(self) -> None:
71
+ """Execute the baseline phase.
72
+
73
+ Generates independent critiques from both critics in parallel.
74
+ Skips already-completed artifacts on resume.
75
+ """
76
+ game_plan = self.get_game_plan()
77
+ # Dynamic lens selection based on model
78
+ critic_a_lens = self.render_template("critic_1_lens.md.j2")
79
+ critic_b_lens = self.render_template("critic_2_lens.md.j2")
80
+
81
+ critic_a_artifact = self.artifact(f"p1_{self.critic_a_name}_baseline", is_json=True)
82
+ critic_b_artifact = self.artifact(f"p1_{self.critic_b_name}_baseline", is_json=True)
83
+
84
+ # Track which critics need to run
85
+ futures: dict[Any, tuple[str, PhaseArtifact]] = {}
86
+
87
+ with ThreadPoolExecutor(max_workers=2) as executor:
88
+ if not critic_a_artifact.is_valid():
89
+ console.print(f" [cyan]{self.critic_a_name} baseline critique...[/cyan]")
90
+ critic_a_prompt = self.render_template(
91
+ "baseline_critique.md.j2",
92
+ lens_prompt=critic_a_lens,
93
+ game_plan=game_plan,
94
+ )
95
+ future = executor.submit(self.critic_a.generate, critic_a_prompt)
96
+ futures[future] = (self.critic_a_name, critic_a_artifact)
97
+ else:
98
+ console.print(f" [dim]{self.critic_a_name} baseline (cached)[/dim]")
99
+
100
+ if not critic_b_artifact.is_valid():
101
+ console.print(f" [cyan]{self.critic_b_name} baseline critique...[/cyan]")
102
+ critic_b_prompt = self.render_template(
103
+ "baseline_critique.md.j2",
104
+ lens_prompt=critic_b_lens,
105
+ game_plan=game_plan,
106
+ )
107
+ future = executor.submit(self.critic_b.generate, critic_b_prompt)
108
+ futures[future] = (self.critic_b_name, critic_b_artifact)
109
+ else:
110
+ console.print(f" [dim]{self.critic_b_name} baseline (cached)[/dim]")
111
+
112
+ # Wait for parallel calls to complete
113
+ for future in as_completed(futures):
114
+ name, artifact = futures[future]
115
+ response: str = future.result()
116
+ artifact.write(response)
117
+ console.print(f" [green]{name} baseline complete[/green]")
@@ -0,0 +1,154 @@
1
+ """Phase 2: Critic vs Critic adversarial debate."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING, Any
8
+
9
+ from rich.console import Console
10
+
11
+ from multi_model_debate.phases.base import Phase, PhaseArtifact
12
+
13
+ if TYPE_CHECKING:
14
+ from multi_model_debate.config import Config
15
+ from multi_model_debate.models.protocols import ModelBackend
16
+
17
+ console = Console()
18
+
19
+
20
+ class DebatePhase(Phase):
21
+ """Phase 2: Critics debate each other.
22
+
23
+ Multiple rounds of alternating critiques. Each round only sees
24
+ the previous round's response (rolling window) to prevent token saturation.
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ run_dir: Path,
30
+ config: Config,
31
+ *,
32
+ critic_a: ModelBackend,
33
+ critic_b: ModelBackend,
34
+ critic_a_name: str,
35
+ critic_b_name: str,
36
+ ) -> None:
37
+ """Initialize the debate phase.
38
+
39
+ Args:
40
+ run_dir: Directory for this run's artifacts.
41
+ config: Configuration settings.
42
+ critic_a: First critic model backend.
43
+ critic_b: Second critic model backend.
44
+ critic_a_name: Display name for first critic (e.g., "codex").
45
+ critic_b_name: Display name for second critic (e.g., "gemini").
46
+ """
47
+ super().__init__(run_dir, config)
48
+ self.critic_a = critic_a
49
+ self.critic_b = critic_b
50
+ self.critic_a_name = critic_a_name
51
+ self.critic_b_name = critic_b_name
52
+ self._rounds = config.debate.critic_rounds
53
+
54
+ @property
55
+ def name(self) -> str:
56
+ """Phase identifier."""
57
+ return "PHASE_2"
58
+
59
+ @property
60
+ def display_name(self) -> str:
61
+ """Human-readable phase name with dynamic model names."""
62
+ return f"{self.critic_a_name} vs {self.critic_b_name} Debate"
63
+
64
+ def required_artifacts(self) -> list[PhaseArtifact]:
65
+ """Artifacts required for phase completion."""
66
+ artifacts = []
67
+ for r in range(1, self._rounds + 1):
68
+ artifacts.append(self.artifact(f"p2_r{r}_{self.critic_a_name}", is_json=True))
69
+ artifacts.append(self.artifact(f"p2_r{r}_{self.critic_b_name}", is_json=True))
70
+ return artifacts
71
+
72
+ def run(self) -> None:
73
+ """Execute the debate phase.
74
+
75
+ Runs multiple rounds where both critics respond in parallel.
76
+ Each critic responds to the other's PREVIOUS round output.
77
+ Uses rolling window context (only previous round's response).
78
+ """
79
+ game_plan = self.get_game_plan()
80
+ # Dynamic lens selection based on model
81
+ critic_a_lens = self.render_template("critic_1_lens.md.j2")
82
+ critic_b_lens = self.render_template("critic_2_lens.md.j2")
83
+
84
+ # Load baselines as starting point
85
+ critic_a_last = self.artifact(f"p1_{self.critic_a_name}_baseline", is_json=True).read()
86
+ critic_b_last = self.artifact(f"p1_{self.critic_b_name}_baseline", is_json=True).read()
87
+
88
+ for round_num in range(1, self._rounds + 1):
89
+ console.print(f" [bold]Round {round_num}/{self._rounds}[/bold]")
90
+
91
+ critic_a_artifact = self.artifact(f"p2_r{round_num}_{self.critic_a_name}", is_json=True)
92
+ critic_b_artifact = self.artifact(f"p2_r{round_num}_{self.critic_b_name}", is_json=True)
93
+
94
+ # Track which critics need to run
95
+ futures: dict[Any, tuple[str, PhaseArtifact]] = {}
96
+ round_label = "Baseline" if round_num == 1 else f"Round {round_num - 1}"
97
+
98
+ with ThreadPoolExecutor(max_workers=2) as executor:
99
+ # Critic A responds to Critic B's previous output
100
+ if not critic_a_artifact.is_valid():
101
+ console.print(f" [cyan]{self.critic_a_name} responding...[/cyan]")
102
+ critic_a_prompt = self.render_template(
103
+ "debate_round.md.j2",
104
+ lens_prompt=critic_a_lens,
105
+ game_plan=game_plan,
106
+ opponent_name=self.critic_b_name.upper(),
107
+ round_label=round_label,
108
+ opponent_response=critic_b_last,
109
+ )
110
+ future = executor.submit(self.critic_a.generate, critic_a_prompt)
111
+ futures[future] = (self.critic_a_name, critic_a_artifact)
112
+ else:
113
+ console.print(f" [dim]{self.critic_a_name} (cached)[/dim]")
114
+
115
+ # Critic B responds to Critic A's previous output
116
+ if not critic_b_artifact.is_valid():
117
+ console.print(f" [cyan]{self.critic_b_name} responding...[/cyan]")
118
+ critic_b_prompt = self.render_template(
119
+ "debate_round.md.j2",
120
+ lens_prompt=critic_b_lens,
121
+ game_plan=game_plan,
122
+ opponent_name=self.critic_a_name.upper(),
123
+ round_label=round_label,
124
+ opponent_response=critic_a_last,
125
+ )
126
+ future = executor.submit(self.critic_b.generate, critic_b_prompt)
127
+ futures[future] = (self.critic_b_name, critic_b_artifact)
128
+ else:
129
+ console.print(f" [dim]{self.critic_b_name} (cached)[/dim]")
130
+
131
+ # Wait for parallel calls to complete
132
+ for future in as_completed(futures):
133
+ name, artifact = futures[future]
134
+ response: str = future.result()
135
+ artifact.write(response)
136
+ console.print(f" [green]{name} done[/green]")
137
+
138
+ # Update "last" responses for next round
139
+ critic_a_last = critic_a_artifact.read()
140
+ critic_b_last = critic_b_artifact.read()
141
+
142
+ def get_final_positions(self) -> tuple[str, str]:
143
+ """Get the final positions from the debate.
144
+
145
+ Returns:
146
+ Tuple of (critic_a_final, critic_b_final) responses.
147
+ """
148
+ critic_a_final = self.artifact(
149
+ f"p2_r{self._rounds}_{self.critic_a_name}", is_json=True
150
+ ).read()
151
+ critic_b_final = self.artifact(
152
+ f"p2_r{self._rounds}_{self.critic_b_name}", is_json=True
153
+ ).read()
154
+ return critic_a_final, critic_b_final
@@ -0,0 +1,186 @@
1
+ """Phase 5: Strategist defends the proposal against the winner's Peer Review."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING
7
+
8
+ from rich.console import Console
9
+
10
+ from multi_model_debate.exceptions import PhaseError
11
+ from multi_model_debate.phases.base import Phase, PhaseArtifact
12
+
13
+ if TYPE_CHECKING:
14
+ from multi_model_debate.config import Config
15
+ from multi_model_debate.models.protocols import ModelBackend
16
+
17
+ console = Console()
18
+
19
+
20
+ class DefensePhase(Phase):
21
+ """Phase 5: Strategist defends the proposal against the debate winner.
22
+
23
+ Fully automated phase where Strategist responds via CLI invocation.
24
+ The Strategist is the AI that authored the game plan and has full context.
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ run_dir: Path,
30
+ config: Config,
31
+ strategist: ModelBackend,
32
+ *,
33
+ critic_a: ModelBackend,
34
+ critic_b: ModelBackend,
35
+ critic_a_name: str,
36
+ critic_b_name: str,
37
+ ) -> None:
38
+ """Initialize the defense phase.
39
+
40
+ Args:
41
+ run_dir: Directory for this run's artifacts.
42
+ config: Configuration settings.
43
+ strategist: Strategist model backend (uses CLI invocation).
44
+ critic_a: First critic model backend.
45
+ critic_b: Second critic model backend.
46
+ critic_a_name: Display name for first critic (e.g., "codex").
47
+ critic_b_name: Display name for second critic (e.g., "gemini").
48
+ """
49
+ super().__init__(run_dir, config)
50
+ self.strategist = strategist
51
+ self.critic_a = critic_a
52
+ self.critic_b = critic_b
53
+ self.critic_a_name = critic_a_name
54
+ self.critic_b_name = critic_b_name
55
+ self._rounds = config.debate.strategist_rounds
56
+
57
+ @property
58
+ def name(self) -> str:
59
+ """Phase identifier."""
60
+ return "PHASE_5"
61
+
62
+ @property
63
+ def display_name(self) -> str:
64
+ """Human-readable phase name."""
65
+ return "Strategist Defense"
66
+
67
+ def required_artifacts(self) -> list[PhaseArtifact]:
68
+ """Artifacts required for phase completion.
69
+
70
+ Phase 5 requires:
71
+ - p5_r0_strategist.md (initial Strategist defense)
72
+ - p5_r{1..N}_winner.md (winner's responses)
73
+ - p5_r{1..N}_strategist.md (Strategist's responses)
74
+ """
75
+ artifacts = [self.artifact("p5_r0_strategist")]
76
+ for r in range(1, self._rounds + 1):
77
+ artifacts.append(self.artifact(f"p5_r{r}_winner"))
78
+ artifacts.append(self.artifact(f"p5_r{r}_strategist"))
79
+ return artifacts
80
+
81
+ def run(self) -> None:
82
+ """Execute the defense phase.
83
+
84
+ Strategist defends against the winner's Peer Review, then multiple
85
+ rounds of back-and-forth debate. All responses are automated via CLI.
86
+ """
87
+ winner_name = self._get_winner_name()
88
+ winner_model = self.critic_a if winner_name == self.critic_a_name else self.critic_b
89
+ winner_lens = self._get_winner_lens(winner_name)
90
+
91
+ game_plan = self.get_game_plan()
92
+ strategist_lens = self.render_template("strategist_proxy_lens.md.j2")
93
+ peer_review = self.artifact("p4_peer_review").read()
94
+
95
+ # Initial Strategist defense against Peer Review
96
+ strategist_initial = self.artifact("p5_r0_strategist")
97
+ if not strategist_initial.is_valid():
98
+ console.print(" [bold cyan]Round 0: Strategist Initial Defense[/bold cyan]")
99
+ prompt = self.render_template(
100
+ "defense_initial.md.j2",
101
+ strategist_lens=strategist_lens,
102
+ peer_review=peer_review,
103
+ )
104
+ response = self.strategist.generate(prompt) # Uses per-model timeout
105
+ strategist_initial.write(response)
106
+ # Journal the Strategist response for audit trail
107
+ self.journal_response(round_num=0, response=response)
108
+ console.print(" [green]Strategist initial defense complete[/green]")
109
+ else:
110
+ console.print(" [dim]Round 0: Strategist Initial (cached)[/dim]")
111
+
112
+ strategist_last = strategist_initial.read()
113
+
114
+ # Debate rounds
115
+ for round_num in range(1, self._rounds + 1):
116
+ console.print(f" [bold]Round {round_num}/{self._rounds}[/bold]")
117
+
118
+ # Winner responds to Strategist
119
+ winner_artifact = self.artifact(f"p5_r{round_num}_winner")
120
+ if not winner_artifact.is_valid():
121
+ console.print(f" [cyan]{winner_name} responding...[/cyan]")
122
+ round_label = "Initial" if round_num == 1 else f"Round {round_num - 1}"
123
+ prompt = self.render_template(
124
+ "winner_response.md.j2",
125
+ winner_lens=winner_lens,
126
+ game_plan=game_plan,
127
+ peer_review=peer_review,
128
+ round_label=round_label,
129
+ strategist_response=strategist_last,
130
+ )
131
+ response = winner_model.generate(prompt) # Uses per-model timeout
132
+ winner_artifact.write(response)
133
+ console.print(f" [green]{winner_name} done[/green]")
134
+ else:
135
+ console.print(f" [dim]{winner_name} (cached)[/dim]")
136
+
137
+ winner_last = winner_artifact.read()
138
+
139
+ # Strategist responds to winner
140
+ strategist_artifact = self.artifact(f"p5_r{round_num}_strategist")
141
+ if not strategist_artifact.is_valid():
142
+ console.print(" [bold cyan]Strategist responding...[/bold cyan]")
143
+ prompt = self.render_template(
144
+ "defense_round.md.j2",
145
+ strategist_lens=strategist_lens,
146
+ round_number=round_num,
147
+ winner_response=winner_last,
148
+ )
149
+ response = self.strategist.generate(prompt) # Uses per-model timeout
150
+ strategist_artifact.write(response)
151
+ # Journal the Strategist response for audit trail
152
+ self.journal_response(round_num=round_num, response=response)
153
+ console.print(" [green]Strategist done[/green]")
154
+ else:
155
+ console.print(" [dim]Strategist (cached)[/dim]")
156
+
157
+ strategist_last = strategist_artifact.read()
158
+
159
+ def _get_winner_name(self) -> str:
160
+ """Get the winner's model name from Phase 3."""
161
+ winner_path = self.run_dir / "p3_winner.txt"
162
+ if not winner_path.exists():
163
+ raise PhaseError("Winner file not found")
164
+ content = winner_path.read_text().strip()
165
+ if content.startswith("WINNER="):
166
+ return content.split("=")[1].strip()
167
+ raise PhaseError(f"Invalid winner file: {content}")
168
+
169
+ def _get_winner_lens(self, winner_name: str) -> str:
170
+ """Get the winner's lens prompt."""
171
+ # Use critic A's lens for critic A, critic B's lens for critic B
172
+ if winner_name == self.critic_a_name:
173
+ template = "critic_1_lens.md.j2"
174
+ else:
175
+ template = "critic_2_lens.md.j2"
176
+ return self.render_template(template)
177
+
178
+ def get_final_responses(self) -> tuple[str, str]:
179
+ """Get the final responses from the defense.
180
+
181
+ Returns:
182
+ Tuple of (final_winner_response, final_strategist_response).
183
+ """
184
+ final_winner = self.artifact(f"p5_r{self._rounds}_winner").read()
185
+ final_strategist = self.artifact(f"p5_r{self._rounds}_strategist").read()
186
+ return final_winner, final_strategist