multi-model-debate 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. multi_model_debate/__init__.py +4 -0
  2. multi_model_debate/__main__.py +6 -0
  3. multi_model_debate/cli.py +290 -0
  4. multi_model_debate/config.py +271 -0
  5. multi_model_debate/exceptions.py +83 -0
  6. multi_model_debate/models/__init__.py +71 -0
  7. multi_model_debate/models/claude.py +168 -0
  8. multi_model_debate/models/cli_wrapper.py +233 -0
  9. multi_model_debate/models/gemini.py +66 -0
  10. multi_model_debate/models/openai.py +66 -0
  11. multi_model_debate/models/protocols.py +35 -0
  12. multi_model_debate/orchestrator.py +465 -0
  13. multi_model_debate/phases/__init__.py +22 -0
  14. multi_model_debate/phases/base.py +236 -0
  15. multi_model_debate/phases/baseline.py +117 -0
  16. multi_model_debate/phases/debate.py +154 -0
  17. multi_model_debate/phases/defense.py +186 -0
  18. multi_model_debate/phases/final_position.py +307 -0
  19. multi_model_debate/phases/judge.py +177 -0
  20. multi_model_debate/phases/synthesis.py +162 -0
  21. multi_model_debate/pre_debate.py +83 -0
  22. multi_model_debate/prompts/arbiter_prompt.md.j2 +24 -0
  23. multi_model_debate/prompts/arbiter_summary.md.j2 +102 -0
  24. multi_model_debate/prompts/baseline_critique.md.j2 +5 -0
  25. multi_model_debate/prompts/critic_1_lens.md.j2 +52 -0
  26. multi_model_debate/prompts/critic_2_lens.md.j2 +52 -0
  27. multi_model_debate/prompts/debate_round.md.j2 +14 -0
  28. multi_model_debate/prompts/defense_initial.md.j2 +9 -0
  29. multi_model_debate/prompts/defense_round.md.j2 +8 -0
  30. multi_model_debate/prompts/judge.md.j2 +34 -0
  31. multi_model_debate/prompts/judge_prompt.md.j2 +13 -0
  32. multi_model_debate/prompts/strategist_proxy_lens.md.j2 +33 -0
  33. multi_model_debate/prompts/synthesis_prompt.md.j2 +16 -0
  34. multi_model_debate/prompts/synthesis_template.md.j2 +44 -0
  35. multi_model_debate/prompts/winner_response.md.j2 +17 -0
  36. multi_model_debate/response_parser.py +268 -0
  37. multi_model_debate/roles.py +163 -0
  38. multi_model_debate/storage/__init__.py +17 -0
  39. multi_model_debate/storage/run.py +509 -0
  40. multi_model_debate-1.0.1.dist-info/METADATA +572 -0
  41. multi_model_debate-1.0.1.dist-info/RECORD +44 -0
  42. multi_model_debate-1.0.1.dist-info/WHEEL +4 -0
  43. multi_model_debate-1.0.1.dist-info/entry_points.txt +2 -0
  44. multi_model_debate-1.0.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,465 @@
1
+ """Main orchestration for the adversarial review workflow."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING, TypedDict
8
+
9
+ from rich.console import Console
10
+
11
+ from multi_model_debate.exceptions import ReviewError
12
+ from multi_model_debate.models import (
13
+ CLIModelBackend,
14
+ create_cli_backend,
15
+ create_strategist_backend,
16
+ )
17
+ from multi_model_debate.phases import (
18
+ BaselinePhase,
19
+ DebatePhase,
20
+ DefensePhase,
21
+ FinalPositionPhase,
22
+ JudgePhase,
23
+ PeerReviewPhase,
24
+ )
25
+ from multi_model_debate.pre_debate import PreDebateProtocol, ProtocolResult
26
+ from multi_model_debate.roles import RoleAssignment, assign_roles, get_critic_pair
27
+ from multi_model_debate.storage.run import (
28
+ PromptHashMismatchError,
29
+ RunContext,
30
+ create_run,
31
+ create_run_from_content,
32
+ find_latest_incomplete_run,
33
+ load_run,
34
+ validate_integrity_hash,
35
+ verify_game_plan_integrity,
36
+ )
37
+
38
+ if TYPE_CHECKING:
39
+ from multi_model_debate.config import Config
40
+
41
+
42
+ class RunStatus(TypedDict):
43
+ """Status information for a run."""
44
+
45
+ run_dir: str
46
+ status: str
47
+ completed_phases: list[str]
48
+ game_plan: str | None
49
+
50
+
51
+ console = Console()
52
+
53
+
54
+ class Orchestrator:
55
+ """Main orchestrator for the adversarial review workflow.
56
+
57
+ Manages the 6-phase review process, handles checkpointing,
58
+ and coordinates model invocations.
59
+
60
+ Models are loaded dynamically from config.models.available.
61
+ Roles (strategist, critics, judge) are assigned dynamically based on
62
+ who initiated the debate.
63
+ """
64
+
65
+ def __init__(self, config: Config, runs_dir: Path) -> None:
66
+ """Initialize the orchestrator.
67
+
68
+ Args:
69
+ config: Configuration settings.
70
+ runs_dir: Directory for storing runs.
71
+ """
72
+ self.config = config
73
+ self.runs_dir = runs_dir
74
+
75
+ # Dynamic role assignment based on config
76
+ self.roles: RoleAssignment = assign_roles(config)
77
+
78
+ # Determine which models to load:
79
+ # - All role models (strategist, critics, judge)
80
+ # - Plus any additional in models.available
81
+ models_to_load: set[str] = set(self.roles.critics)
82
+ models_to_load.add(self.roles.strategist)
83
+ models_to_load.add(self.roles.judge)
84
+ models_to_load.update(config.models.available)
85
+
86
+ # Load models dynamically from config
87
+ self.models: dict[str, CLIModelBackend] = {}
88
+ for name in models_to_load:
89
+ try:
90
+ cli_config = config.cli[name]
91
+ self.models[name] = create_cli_backend(
92
+ name=name,
93
+ cli_config=cli_config,
94
+ retry_config=config.retry,
95
+ min_response_length=config.models.min_response_length,
96
+ default_timeout=config.models.default_timeout,
97
+ )
98
+ except KeyError as err:
99
+ raise ReviewError(
100
+ f"No CLI configuration for model '{name}'. Add [cli.{name}] section to config."
101
+ ) from err
102
+ console.print(
103
+ f"[dim]Roles: strategist={self.roles.strategist}, "
104
+ f"critics={self.roles.critics}, judge={self.roles.judge}[/dim]"
105
+ )
106
+
107
+ # Get the two critics for debate phases
108
+ self._critic_a_name, self._critic_b_name = get_critic_pair(self.roles)
109
+ critic_a = self.models.get(self._critic_a_name)
110
+ critic_b = self.models.get(self._critic_b_name)
111
+ if critic_a is None:
112
+ raise ReviewError(f"Critic model '{self._critic_a_name}' not available")
113
+ if critic_b is None:
114
+ raise ReviewError(f"Critic model '{self._critic_b_name}' not available")
115
+ self.critic_a: CLIModelBackend = critic_a
116
+ self.critic_b: CLIModelBackend = critic_b
117
+
118
+ # Judge model (same family as strategist, isolated instance for judging critics)
119
+ judge = self.models.get(self.roles.judge)
120
+ if judge is None:
121
+ raise ReviewError(f"Judge model '{self.roles.judge}' not available")
122
+ self.judge_model: CLIModelBackend = judge
123
+
124
+ # Strategist backend for phases 5 & 6 (fully automated via CLI)
125
+ # Uses the strategist's CLI config, not hardcoded to any specific model.
126
+ # See REQUIREMENTS_V2.md Section 4 for rationale on full automation.
127
+ self.strategist = create_strategist_backend(
128
+ cli_config=config.cli[self.roles.strategist],
129
+ retry_config=config.retry,
130
+ min_response_length=config.models.min_response_length,
131
+ default_timeout=config.models.default_timeout,
132
+ )
133
+
134
+ def start(self, game_plan: Path) -> RunContext:
135
+ """Start a new adversarial review.
136
+
137
+ Args:
138
+ game_plan: Path to the game plan file.
139
+
140
+ Returns:
141
+ RunContext for the new run.
142
+
143
+ Raises:
144
+ ReviewError: If game plan doesn't exist.
145
+ """
146
+ if not game_plan.exists():
147
+ raise ReviewError(f"Game plan not found: {game_plan}")
148
+
149
+ context = create_run(game_plan, self.runs_dir, self.config)
150
+ console.print("[bold green]Starting new review[/bold green]")
151
+ console.print(f" Run: {context.run_dir}")
152
+ console.print(f" Game plan: {game_plan}")
153
+ console.print()
154
+
155
+ return context
156
+
157
+ def start_from_content(self, content: str) -> RunContext:
158
+ """Start a new adversarial review from content string.
159
+
160
+ Args:
161
+ content: The game plan content as a string.
162
+
163
+ Returns:
164
+ RunContext for the new run.
165
+
166
+ Raises:
167
+ ReviewError: If content is empty.
168
+ """
169
+ if not content or not content.strip():
170
+ raise ReviewError("Game plan content is empty")
171
+
172
+ context = create_run_from_content(content, self.runs_dir, self.config)
173
+ console.print("[bold green]Starting new review[/bold green]")
174
+ console.print(f" Run: {context.run_dir}")
175
+ console.print(" Game plan: (stdin)")
176
+ console.print()
177
+
178
+ return context
179
+
180
+ def run_pre_debate_protocol(
181
+ self,
182
+ context: RunContext | None = None,
183
+ skip_protocol: bool = False,
184
+ ) -> ProtocolResult | None:
185
+ """Run the pre-debate protocol.
186
+
187
+ The protocol injects the current date context so models can
188
+ assess proposal relevance against current technology.
189
+
190
+ Args:
191
+ context: Run context for tracking completion state.
192
+ skip_protocol: Skip the entire protocol.
193
+
194
+ Returns:
195
+ ProtocolResult if run, None if skipped.
196
+ """
197
+ # Check if already complete (on resume)
198
+ if context is not None and context.is_pre_debate_complete():
199
+ console.print("[dim]Pre-debate protocol already complete[/dim]")
200
+ return None
201
+
202
+ # Check if protocol is enabled
203
+ if skip_protocol or not self.config.pre_debate.enabled:
204
+ console.print("[dim]Pre-debate protocol skipped[/dim]")
205
+ # Mark complete even when skipped so resume doesn't re-run
206
+ if context is not None:
207
+ context.mark_pre_debate_complete()
208
+ return None
209
+
210
+ # Run the protocol
211
+ protocol = PreDebateProtocol(models=self.models, config=self.config)
212
+ result = protocol.run()
213
+
214
+ # Mark complete
215
+ if context is not None:
216
+ context.mark_pre_debate_complete()
217
+
218
+ return result
219
+
220
+ def resume(self, run_dir: Path | None = None) -> RunContext:
221
+ """Resume an incomplete review.
222
+
223
+ Args:
224
+ run_dir: Specific run to resume. If None, finds latest incomplete.
225
+
226
+ Returns:
227
+ RunContext for the resumed run.
228
+
229
+ Raises:
230
+ ReviewError: If no incomplete run found.
231
+ PromptHashMismatchError: If prompts have changed since debate started.
232
+ """
233
+ if run_dir is None:
234
+ run_dir = find_latest_incomplete_run(self.runs_dir)
235
+
236
+ if run_dir is None:
237
+ raise ReviewError("No incomplete run found to resume")
238
+
239
+ # CRITICAL: Validate prompts, config, and env vars haven't changed
240
+ # See REQUIREMENTS_V2.md Section 5 - NO "continue anyway" option
241
+ if not validate_integrity_hash(run_dir):
242
+ console.print()
243
+ console.print("[bold red]ERROR: Integrity check failed[/bold red]")
244
+ console.print()
245
+ console.print(
246
+ "[yellow]Prompts, config, or environment have changed "
247
+ "since this debate started.[/yellow]"
248
+ )
249
+ console.print(
250
+ "[yellow]A debate with changed state produces unreliable results.[/yellow]"
251
+ )
252
+ console.print("[yellow]Must restart debate from beginning.[/yellow]")
253
+ console.print()
254
+ raise PromptHashMismatchError(run_dir)
255
+
256
+ context = load_run(run_dir, self.config)
257
+
258
+ # Verify game plan integrity (warning only, not blocking)
259
+ if not verify_game_plan_integrity(context):
260
+ console.print("[yellow]WARNING: Game plan has changed since run started[/yellow]")
261
+
262
+ console.print("[bold green]Resuming review[/bold green]")
263
+ console.print(f" Run: {context.run_dir}")
264
+ console.print(f" Completed: {context.completed_phases()}")
265
+ console.print()
266
+
267
+ return context
268
+
269
+ def execute(self, context: RunContext) -> None:
270
+ """Execute all phases of the review.
271
+
272
+ Args:
273
+ context: The run context.
274
+
275
+ Raises:
276
+ ReviewError: If any phase fails.
277
+ """
278
+ # Set error log for all CLI backends
279
+ for model in self.models.values():
280
+ model.error_log = context.error_log
281
+
282
+ completed = context.completed_phases()
283
+
284
+ try:
285
+ # Phase 1: Baseline Critiques (from both critics)
286
+ phase1 = BaselinePhase(
287
+ run_dir=context.run_dir,
288
+ config=self.config,
289
+ critic_a=self.critic_a,
290
+ critic_b=self.critic_b,
291
+ critic_a_name=self._critic_a_name,
292
+ critic_b_name=self._critic_b_name,
293
+ )
294
+ self._run_phase(phase1, context, completed)
295
+
296
+ # Phase 2: Critic vs Critic Debate
297
+ phase2 = DebatePhase(
298
+ run_dir=context.run_dir,
299
+ config=self.config,
300
+ critic_a=self.critic_a,
301
+ critic_b=self.critic_b,
302
+ critic_a_name=self._critic_a_name,
303
+ critic_b_name=self._critic_b_name,
304
+ )
305
+ self._run_phase(phase2, context, completed)
306
+
307
+ # Phase 3: Winner Determination (by Judge)
308
+ phase3 = JudgePhase(
309
+ run_dir=context.run_dir,
310
+ config=self.config,
311
+ judge=self.judge_model,
312
+ critic_a_name=self._critic_a_name,
313
+ critic_b_name=self._critic_b_name,
314
+ )
315
+ self._run_phase(phase3, context, completed)
316
+
317
+ # Phase 4: Peer Review (by winning critic)
318
+ phase4 = PeerReviewPhase(
319
+ run_dir=context.run_dir,
320
+ config=self.config,
321
+ critic_a=self.critic_a,
322
+ critic_b=self.critic_b,
323
+ critic_a_name=self._critic_a_name,
324
+ critic_b_name=self._critic_b_name,
325
+ )
326
+ self._run_phase(phase4, context, completed)
327
+
328
+ # Phase 5: Strategist Defense (against winning critic)
329
+ phase5 = DefensePhase(
330
+ run_dir=context.run_dir,
331
+ config=self.config,
332
+ strategist=self.strategist,
333
+ critic_a=self.critic_a,
334
+ critic_b=self.critic_b,
335
+ critic_a_name=self._critic_a_name,
336
+ critic_b_name=self._critic_b_name,
337
+ )
338
+ self._run_phase(phase5, context, completed)
339
+
340
+ # Phase 6: Final Position (by Strategist)
341
+ phase6 = FinalPositionPhase(
342
+ run_dir=context.run_dir,
343
+ config=self.config,
344
+ strategist=self.strategist,
345
+ )
346
+ self._run_phase(phase6, context, completed)
347
+
348
+ # Mark complete and display Final Position
349
+ context.log_status("COMPLETED")
350
+ phase6.display_final_position()
351
+
352
+ # Human notification at the END - this is the only notification
353
+ # per REQUIREMENTS_V2.md Section 4
354
+ self._notify("Final Position ready for your review")
355
+
356
+ except Exception as e:
357
+ context.log_status(f"FAILED: {e}")
358
+ self._notify(f"Review failed: {e}")
359
+ raise
360
+
361
+ def _run_phase(
362
+ self,
363
+ phase: BaselinePhase
364
+ | DebatePhase
365
+ | JudgePhase
366
+ | PeerReviewPhase
367
+ | DefensePhase
368
+ | FinalPositionPhase,
369
+ context: RunContext,
370
+ completed: set[str],
371
+ ) -> None:
372
+ """Run a single phase with checkpoint handling.
373
+
374
+ Args:
375
+ phase: The phase to run.
376
+ context: The run context.
377
+ completed: Set of already-completed phase names.
378
+
379
+ Note: Human notifications only happen at the END when Final Position
380
+ is ready, not after each phase. See REQUIREMENTS_V2.md Section 4.
381
+ """
382
+ if phase.name in completed and phase.is_complete():
383
+ console.print(f"[dim]Skipping {phase.display_name} (complete)[/dim]")
384
+ return
385
+
386
+ console.print(f"[bold]Running {phase.display_name}...[/bold]")
387
+ phase.run()
388
+
389
+ if not phase.is_complete():
390
+ raise ReviewError(f"{phase.display_name} failed to produce valid artifacts")
391
+
392
+ context.mark_complete(phase.name)
393
+ # NOTE: No notification here - human is only notified at the END
394
+ # when Final Position is ready. See REQUIREMENTS_V2.md Section 4.
395
+
396
+ def _notify(self, message: str) -> None:
397
+ """Send a desktop notification if enabled.
398
+
399
+ Args:
400
+ message: The notification message.
401
+ """
402
+ if not self.config.notification.enabled:
403
+ return
404
+
405
+ import subprocess
406
+
407
+ try:
408
+ subprocess.run(
409
+ [self.config.notification.command, "Adversarial Review", message],
410
+ capture_output=True,
411
+ timeout=5,
412
+ )
413
+ except Exception as e:
414
+ print(f"Warning: Notification failed - {e}", file=sys.stderr)
415
+
416
+ def status(self) -> RunStatus | None:
417
+ """Get the status of the latest run.
418
+
419
+ Returns:
420
+ Status dictionary or None if no runs exist.
421
+ """
422
+ if not self.runs_dir.exists():
423
+ return None
424
+
425
+ run_dirs = sorted(
426
+ [d for d in self.runs_dir.iterdir() if d.is_dir()],
427
+ reverse=True,
428
+ )
429
+
430
+ if not run_dirs:
431
+ return None
432
+
433
+ run_dir = run_dirs[0]
434
+ status_file = run_dir / "status.txt"
435
+ checkpoint_file = run_dir / "checkpoint.txt"
436
+ manifest_file = run_dir / "manifest.json"
437
+
438
+ result: RunStatus = {
439
+ "run_dir": str(run_dir),
440
+ "status": "unknown",
441
+ "completed_phases": [],
442
+ "game_plan": None,
443
+ }
444
+
445
+ if status_file.exists():
446
+ content = status_file.read_text()
447
+ if "COMPLETED" in content:
448
+ result["status"] = "completed"
449
+ elif "FAILED" in content:
450
+ result["status"] = "failed"
451
+ else:
452
+ result["status"] = "in_progress"
453
+
454
+ if checkpoint_file.exists():
455
+ content = checkpoint_file.read_text().strip()
456
+ if content:
457
+ result["completed_phases"] = content.split("\n")
458
+
459
+ if manifest_file.exists():
460
+ import json
461
+
462
+ manifest = json.loads(manifest_file.read_text())
463
+ result["game_plan"] = manifest.get("game_plan")
464
+
465
+ return result
@@ -0,0 +1,22 @@
1
+ """Review phases for the adversarial critique workflow."""
2
+
3
+ from multi_model_debate.phases.base import Phase, PhaseArtifact
4
+ from multi_model_debate.phases.baseline import BaselinePhase
5
+ from multi_model_debate.phases.debate import DebatePhase
6
+ from multi_model_debate.phases.defense import DefensePhase
7
+ from multi_model_debate.phases.final_position import FinalPositionPhase
8
+ from multi_model_debate.phases.judge import JudgePhase
9
+ from multi_model_debate.phases.synthesis import PeerReviewPhase
10
+
11
+ __all__ = [
12
+ # Base
13
+ "Phase",
14
+ "PhaseArtifact",
15
+ # Phases
16
+ "BaselinePhase",
17
+ "DebatePhase",
18
+ "JudgePhase",
19
+ "PeerReviewPhase",
20
+ "DefensePhase",
21
+ "FinalPositionPhase",
22
+ ]