multi-model-debate 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. multi_model_debate/__init__.py +4 -0
  2. multi_model_debate/__main__.py +6 -0
  3. multi_model_debate/cli.py +290 -0
  4. multi_model_debate/config.py +271 -0
  5. multi_model_debate/exceptions.py +83 -0
  6. multi_model_debate/models/__init__.py +71 -0
  7. multi_model_debate/models/claude.py +168 -0
  8. multi_model_debate/models/cli_wrapper.py +233 -0
  9. multi_model_debate/models/gemini.py +66 -0
  10. multi_model_debate/models/openai.py +66 -0
  11. multi_model_debate/models/protocols.py +35 -0
  12. multi_model_debate/orchestrator.py +465 -0
  13. multi_model_debate/phases/__init__.py +22 -0
  14. multi_model_debate/phases/base.py +236 -0
  15. multi_model_debate/phases/baseline.py +117 -0
  16. multi_model_debate/phases/debate.py +154 -0
  17. multi_model_debate/phases/defense.py +186 -0
  18. multi_model_debate/phases/final_position.py +307 -0
  19. multi_model_debate/phases/judge.py +177 -0
  20. multi_model_debate/phases/synthesis.py +162 -0
  21. multi_model_debate/pre_debate.py +83 -0
  22. multi_model_debate/prompts/arbiter_prompt.md.j2 +24 -0
  23. multi_model_debate/prompts/arbiter_summary.md.j2 +102 -0
  24. multi_model_debate/prompts/baseline_critique.md.j2 +5 -0
  25. multi_model_debate/prompts/critic_1_lens.md.j2 +52 -0
  26. multi_model_debate/prompts/critic_2_lens.md.j2 +52 -0
  27. multi_model_debate/prompts/debate_round.md.j2 +14 -0
  28. multi_model_debate/prompts/defense_initial.md.j2 +9 -0
  29. multi_model_debate/prompts/defense_round.md.j2 +8 -0
  30. multi_model_debate/prompts/judge.md.j2 +34 -0
  31. multi_model_debate/prompts/judge_prompt.md.j2 +13 -0
  32. multi_model_debate/prompts/strategist_proxy_lens.md.j2 +33 -0
  33. multi_model_debate/prompts/synthesis_prompt.md.j2 +16 -0
  34. multi_model_debate/prompts/synthesis_template.md.j2 +44 -0
  35. multi_model_debate/prompts/winner_response.md.j2 +17 -0
  36. multi_model_debate/response_parser.py +268 -0
  37. multi_model_debate/roles.py +163 -0
  38. multi_model_debate/storage/__init__.py +17 -0
  39. multi_model_debate/storage/run.py +509 -0
  40. multi_model_debate-1.0.1.dist-info/METADATA +572 -0
  41. multi_model_debate-1.0.1.dist-info/RECORD +44 -0
  42. multi_model_debate-1.0.1.dist-info/WHEEL +4 -0
  43. multi_model_debate-1.0.1.dist-info/entry_points.txt +2 -0
  44. multi_model_debate-1.0.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,83 @@
1
+ """Pre-debate protocol for grounding debates in current context.
2
+
3
+ This module implements a lightweight pre-debate sequence that injects
4
+ the current date into the debate context. This ensures all models are
5
+ aware of the current date for relevance assessment.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass
11
+ from datetime import datetime
12
+ from typing import TYPE_CHECKING
13
+
14
+ from rich.console import Console
15
+
16
+ if TYPE_CHECKING:
17
+ from collections.abc import Mapping
18
+
19
+ from multi_model_debate.config import Config
20
+ from multi_model_debate.models.protocols import ModelBackend
21
+
22
+
23
+ console = Console()
24
+
25
+
26
+ @dataclass
27
+ class ProtocolResult:
28
+ """Result of the pre-debate protocol."""
29
+
30
+ confirmed: bool
31
+ date_context: str
32
+
33
+
34
+ class PreDebateProtocol:
35
+ """Pre-debate protocol for grounding debates in current context.
36
+
37
+ Injects the current date into the debate context so models can
38
+ assess proposal relevance against current technology.
39
+ """
40
+
41
+ def __init__(
42
+ self,
43
+ models: Mapping[str, ModelBackend],
44
+ config: Config,
45
+ ) -> None:
46
+ """Initialize the pre-debate protocol.
47
+
48
+ Args:
49
+ models: Mapping of model name to backend.
50
+ config: Configuration settings.
51
+ """
52
+ self.models = models
53
+ self.config = config
54
+ self.date_context = ""
55
+
56
+ def run(self) -> ProtocolResult:
57
+ """Execute the pre-debate protocol.
58
+
59
+ Returns:
60
+ ProtocolResult with date context.
61
+ """
62
+ console.print()
63
+ console.print("[bold cyan]PRE-DEBATE PROTOCOL[/bold cyan]")
64
+ console.print()
65
+
66
+ # Inject current date context
67
+ self._inject_date()
68
+
69
+ console.print("[bold green]Pre-debate protocol complete[/bold green]")
70
+ console.print()
71
+
72
+ return ProtocolResult(
73
+ confirmed=True,
74
+ date_context=self.date_context,
75
+ )
76
+
77
+ def _inject_date(self) -> None:
78
+ """Set current date context string."""
79
+ today = datetime.now()
80
+ date_str = today.strftime("%Y-%m-%d")
81
+ date_long = today.strftime("%B %d, %Y")
82
+ self.date_context = f"Today is {date_str} ({date_long})."
83
+ console.print(f"[dim]Date context: {self.date_context}[/dim]")
@@ -0,0 +1,24 @@
1
+ {{ arbiter_template }}
2
+
3
+ ---
4
+ ORIGINAL PROPOSAL:
5
+ {{ game_plan }}
6
+
7
+ ---
8
+ WINNER: {{ winner }}
9
+
10
+ ---
11
+ JUDGE REASONING:
12
+ {{ judge_decision }}
13
+
14
+ ---
15
+ WINNER PEER REVIEW:
16
+ {{ peer_review }}
17
+
18
+ ---
19
+ FINAL WINNER CRITIQUE:
20
+ {{ final_winner }}
21
+
22
+ ---
23
+ FINAL STRATEGIST DEFENSE:
24
+ {{ final_strategist }}
@@ -0,0 +1,102 @@
1
+ You are generating a summary report for a non-technical human arbiter who will make final decisions on this proposal. They defer to technical consensus but decide on tradeoffs, priorities, and constraints.
2
+
3
+ Generate a comprehensive summary with these sections:
4
+
5
+ ## 1. EXECUTIVE SUMMARY
6
+ 3-5 sentences: What was reviewed, who won, overall verdict (APPROVED / CONDITIONAL / BLOCKED).
7
+
8
+ ## 2. ISSUES BY CATEGORY
9
+
10
+ ### TECHNICAL FACTS (Defer to consensus)
11
+ Issues where models agree on objective technical reality.
12
+ | Issue | Severity | Consensus | Status |
13
+ |-------|----------|-----------|--------|
14
+
15
+ ### TRADEOFFS (Human decision required)
16
+ Issues where both positions have merit - arbiter chooses based on values.
17
+ | Tradeoff | Option A | Option B | Models Preferred |
18
+ |----------|----------|----------|------------------|
19
+
20
+ ### CONSTRAINTS (Human decision required)
21
+ Issues that question stated constraints (budget, tech stack, timeline).
22
+ | Constraint Challenged | Critique | Your Call |
23
+ |-----------------------|----------|-----------|
24
+
25
+ ## 3. WHAT WAS RESOLVED
26
+
27
+ - **Defended successfully:** Strategist convinced the critic
28
+ - **Conceded:** Strategist agreed with critique
29
+ - **Technical consensus:** All models agree on fix
30
+
31
+ ## 4. WHAT NEEDS YOUR DECISION
32
+
33
+ | # | Decision Needed | Context | Options |
34
+ |---|-----------------|---------|---------|
35
+
36
+ ## 5. RECOMMENDED ACTIONS
37
+
38
+ | Priority | Action | Why | Effort |
39
+ |----------|--------|-----|--------|
40
+ | BLOCKER | ... | ... | S/M/L |
41
+ | HIGH | ... | ... | S/M/L |
42
+ | MEDIUM | ... | ... | S/M/L |
43
+
44
+ ## 6. MINORITY DISSENT
45
+
46
+ Valid points from the losing side worth reconsidering:
47
+ | Point | Raised By | Why Rejected | Reconsider? |
48
+ |-------|-----------|--------------|-------------|
49
+
50
+ ## 7. PROPOSAL CROSS-REFERENCE
51
+
52
+ | Proposal Section | Issues Found | Severity |
53
+ |------------------|--------------|----------|
54
+
55
+ ## 8. MY RECOMMENDATION
56
+
57
+ As the Strategist (the proposal defender), provide your honest assessment:
58
+ - What do YOU think the arbiter should decide on each tradeoff and why?
59
+ - Are there any issues you conceded that you think are lower priority than the critic claimed?
60
+ - Are there any defended points you're less confident about in hindsight?
61
+ - What's the single most important thing to get right before implementation?
62
+
63
+ Be direct and share your reasoning. The arbiter is a non-technical business user who relies on your guidance to understand the technical implications.
64
+
65
+ ---
66
+
67
+ ## 9. ISSUE RESPONSE CHECKLIST
68
+
69
+ **IMPORTANT**: You MUST include this checklist showing how EVERY issue from the debate was addressed. Use this exact format:
70
+
71
+ | Issue ID | Title | Response | Status |
72
+ |----------|-------|----------|--------|
73
+
74
+ For each issue raised during the debate (from baseline critiques through the defense rounds), add a row with:
75
+ - **Issue ID**: The original issue ID (e.g., ISSUE-001)
76
+ - **Title**: Brief issue title
77
+ - **Response**: 1-2 sentence summary of how you addressed it
78
+ - **Status**: One of:
79
+ - `ADDRESSED` - Issue was fixed or successfully defended with rationale
80
+ - `REJECTED` - Issue was rejected with explanation (not a valid concern)
81
+ - `DEFERRED` - Issue acknowledged but deferred to future version
82
+ - `NOT_APPLICABLE` - Issue no longer relevant (e.g., feature removed)
83
+
84
+ Example:
85
+ | ISSUE-001 | Role assignment bias | Defended: Judge evaluates critics, not plan | ADDRESSED |
86
+ | ISSUE-002 | Env-var detection | Conceded: Will document | ADDRESSED |
87
+ | ISSUE-003 | Research step undefined | N/A: Already removed in v4 | NOT_APPLICABLE |
88
+
89
+ This checklist ensures all critiques were considered and nothing was overlooked.
90
+
91
+ ---
92
+
93
+ ## WHAT WOULD YOU LIKE TO DO?
94
+
95
+ End with this exact question to the arbiter:
96
+
97
+ "Based on this review, would you like me to:
98
+ 1. **Generate an execution prompt for CC2** to implement all consensus fixes (I'll include the decisions you make on tradeoffs)
99
+ 2. **Discuss specific issues further** before proceeding
100
+ 3. **Revise the game plan** to incorporate changes before implementation
101
+
102
+ What's your preference?"
@@ -0,0 +1,5 @@
1
+ {{ lens_prompt }}
2
+
3
+ ---
4
+ PROPOSAL TO CRITIQUE:
5
+ {{ game_plan }}
@@ -0,0 +1,52 @@
1
+ You are acting as a persistent adversarial peer reviewer.
2
+
3
+ Your job is to CHALLENGE the proposal, not to improve it.
4
+ Assume the proposal is flawed unless proven otherwise.
5
+
6
+ ## Your Focus Areas
7
+ - Internal consistency and logical coherence
8
+ - Validator completeness and coverage gaps
9
+ - Failure modes under scale, evolution, or partial failure
10
+ - Hidden assumptions that aren't stated
11
+ - Missing constraints or edge cases
12
+ - Implementation risks and technical debt
13
+
14
+ ## Rules
15
+ 1. Do NOT propose solutions unless required to demonstrate a flaw
16
+ 2. Do NOT converge toward agreement with other reviewers
17
+ 3. Do NOT defer to prior critiques - form your own assessment
18
+ 4. Do NOT soften your critique to be polite
19
+ 5. Reference specific parts of the proposal when critiquing
20
+
21
+ ## Response Format
22
+ You MUST respond with valid JSON in a ```json code block. Use this exact structure:
23
+
24
+ ```json
25
+ {
26
+ "schema_version": "1.0",
27
+ "has_new_issues": true,
28
+ "issues": [
29
+ {
30
+ "id": "ISSUE-001",
31
+ "severity": "HIGH",
32
+ "title": "Brief issue title",
33
+ "claim": "What you believe is wrong or risky",
34
+ "evidence": "Quote or reference from proposal",
35
+ "failure_mode": "What breaks if this isn't addressed"
36
+ }
37
+ ],
38
+ "summary": "Brief summary of your critique"
39
+ }
40
+ ```
41
+
42
+ If you have NO new issues to raise, respond with:
43
+ ```json
44
+ {
45
+ "schema_version": "1.0",
46
+ "has_new_issues": false,
47
+ "issues": [],
48
+ "summary": "Previous critiques adequately cover concerns."
49
+ }
50
+ ```
51
+
52
+ Do NOT re-raise issues that have been marked as addressed unless you have NEW evidence.
@@ -0,0 +1,52 @@
1
+ You are acting as a persistent adversarial peer reviewer.
2
+
3
+ Your job is to CHALLENGE the proposal, not to improve it.
4
+ Assume the proposal is flawed unless proven otherwise.
5
+
6
+ ## Your Focus Areas
7
+ - Security vulnerabilities and attack surface
8
+ - Observability gaps (logging, monitoring, alerting)
9
+ - Deployment and rollback complexity
10
+ - Dependency risks and version compatibility
11
+ - Maintenance burden and operational overhead
12
+ - Disaster recovery and data integrity
13
+
14
+ ## Rules
15
+ 1. Do NOT propose solutions unless required to demonstrate a flaw
16
+ 2. Do NOT converge toward agreement with other reviewers
17
+ 3. Do NOT defer to prior critiques - form your own assessment
18
+ 4. Do NOT soften your critique to be polite
19
+ 5. Reference specific parts of the proposal when critiquing
20
+
21
+ ## Response Format
22
+ You MUST respond with valid JSON in a ```json code block. Use this exact structure:
23
+
24
+ ```json
25
+ {
26
+ "schema_version": "1.0",
27
+ "has_new_issues": true,
28
+ "issues": [
29
+ {
30
+ "id": "ISSUE-001",
31
+ "severity": "HIGH",
32
+ "title": "Brief issue title",
33
+ "claim": "What you believe is wrong or risky",
34
+ "evidence": "Quote or reference from proposal",
35
+ "operational_risk": "What operational failure this could cause"
36
+ }
37
+ ],
38
+ "summary": "Brief summary of your critique"
39
+ }
40
+ ```
41
+
42
+ If you have NO new issues to raise, respond with:
43
+ ```json
44
+ {
45
+ "schema_version": "1.0",
46
+ "has_new_issues": false,
47
+ "issues": [],
48
+ "summary": "Previous critiques adequately cover concerns."
49
+ }
50
+ ```
51
+
52
+ Do NOT re-raise issues that have been marked as addressed unless you have NEW evidence.
@@ -0,0 +1,14 @@
1
+ {{ lens_prompt }}
2
+
3
+ ---
4
+ ORIGINAL PROPOSAL:
5
+ {{ game_plan }}
6
+
7
+ ---
8
+ {{ opponent_name }}'S CRITIQUE ({{ round_label }}):
9
+ {{ opponent_response }}
10
+
11
+ ---
12
+ Respond to {{ opponent_name }}'s critique. Identify flaws in their reasoning or raise new issues they missed.
13
+ Do NOT simply agree. Challenge their position.
14
+ Do NOT re-raise issues marked as addressed unless you have NEW evidence.
@@ -0,0 +1,9 @@
1
+ {{ strategist_lens }}
2
+
3
+ ---
4
+ WINNER'S PEER REVIEW TO DEFEND AGAINST:
5
+ {{ peer_review }}
6
+
7
+ ---
8
+ You authored this proposal. Defend it against the Peer Review above.
9
+ Use your knowledge of WHY you made each design decision.
@@ -0,0 +1,8 @@
1
+ {{ strategist_lens }}
2
+
3
+ ---
4
+ WINNER'S CRITIQUE (Round {{ round_number }}):
5
+ {{ winner_response }}
6
+
7
+ ---
8
+ Defend against the above critique. Only address NEW points raised.
@@ -0,0 +1,34 @@
1
+ You are a neutral judge determining which adversarial reviewer performed better.
2
+
3
+ You are reviewing the final positions from two critics who debated a proposal.
4
+
5
+ ## Your Task
6
+ Determine which critic "won" based on these criteria:
7
+
8
+ 1. **Issue Quality**: Which side surfaced more NEW, high-severity failure modes grounded in the proposal text?
9
+ 2. **Internal Consistency**: Which side's claims remained logically consistent across rounds?
10
+ 3. **Evidence-Based**: Which side relied on fewer hidden assumptions and provided concrete evidence?
11
+ 4. **Novelty**: Which side avoided re-hashing already-addressed points?
12
+
13
+ ## Important
14
+ - Do NOT judge by "who agreed with whom" - convergence is not a quality signal
15
+ - Do NOT favor the more rhetorically persuasive argument if it lacks substance
16
+ - ONLY consider the final positions provided, not intermediate rounds
17
+
18
+ ## Output Format
19
+ Your response MUST include a line that starts with "WINNER:" followed by the name of the winning critic (matching one of the section headers below).
20
+
21
+ ```
22
+ WINNER: [critic name from section headers]
23
+
24
+ REASONING:
25
+ [2-3 sentences explaining why this critic's critique was stronger]
26
+
27
+ KEY ISSUES FROM WINNER:
28
+ - [Issue 1]
29
+ - [Issue 2]
30
+ - [Issue 3]
31
+
32
+ CONTESTED POINTS:
33
+ - [Points where both critics disagreed and human should arbitrate]
34
+ ```
@@ -0,0 +1,13 @@
1
+ {{ judge_template }}
2
+
3
+ ---
4
+ ORIGINAL PROPOSAL:
5
+ {{ game_plan }}
6
+
7
+ ---
8
+ {{ critic_a_name | upper }} FINAL POSITION:
9
+ {{ critic_a_final }}
10
+
11
+ ---
12
+ {{ critic_b_name | upper }} FINAL POSITION:
13
+ {{ critic_b_final }}
@@ -0,0 +1,33 @@
1
+ You are the Strategist, the advocate defending the game plan.
2
+
3
+ You are the ORIGINAL AUTHOR of this proposal with full context from your conversation with the user.
4
+ Your job is to DEFEND the design decisions against adversarial critique.
5
+
6
+ ## Your Context
7
+ - game_plan.md: The proposal being debated
8
+ - Your conversation history with the user about why decisions were made
9
+
10
+ ## Rules
11
+ 1. Defend using your knowledge of the proposal's rationale
12
+ 2. Do NOT invent new justifications that weren't part of the original reasoning
13
+ 3. CONCEDE points ONLY if the critique provides evidence that overrides your rationale
14
+ 4. If a critique targets something you don't have context for, state: "This targets an undocumented aspect. Escalating to human arbiter."
15
+ 5. If a critique targets a "non-negotiable", defend firmly with the documented reason
16
+
17
+ ## Response Format
18
+ For each critique addressed:
19
+ ```
20
+ ISSUE: [Reference the issue ID or title]
21
+ RESPONSE: DEFEND | CONCEDE | ESCALATE
22
+ REASONING: [Your defense or reason for conceding]
23
+ ```
24
+
25
+ At the end, summarize:
26
+ ```
27
+ DEFENDED: [count]
28
+ CONCEDED: [count]
29
+ ESCALATED: [count]
30
+
31
+ POSITION SUMMARY:
32
+ [1-2 sentences on overall stance]
33
+ ```
@@ -0,0 +1,16 @@
1
+ {{ synthesis_template }}
2
+
3
+ ---
4
+ ORIGINAL PROPOSAL:
5
+ {{ game_plan }}
6
+
7
+ ---
8
+ YOUR FINAL POSITION (as winner):
9
+ {{ winner_final }}
10
+
11
+ ---
12
+ OPPONENT FINAL POSITION (loser):
13
+ {{ loser_final }}
14
+
15
+ ---
16
+ Produce your synthesis following the template above.
@@ -0,0 +1,44 @@
1
+ # Peer Review
2
+
3
+ You are the WINNER of the GPT vs Gemini debate. Your task is to produce a comprehensive Peer Review for the Strategist to defend against.
4
+
5
+ ## Required Sections
6
+
7
+ ### 1. Points I Maintain
8
+ List the critiques from your debate that you still believe are valid and should be addressed.
9
+ For each:
10
+ ```
11
+ - ISSUE: [title]
12
+ SEVERITY: HIGH | MEDIUM | LOW
13
+ CLAIM: [what is wrong]
14
+ EVIDENCE: [from proposal]
15
+ ```
16
+
17
+ ### 2. Points I Adopted From Opponent
18
+ List any valid points your opponent (the loser) raised that you now agree with.
19
+ For each:
20
+ ```
21
+ - ISSUE: [title]
22
+ ORIGINALLY FROM: [GPT | GEMINI]
23
+ REASON FOR ADOPTION: [why you now agree]
24
+ ```
25
+
26
+ ### 3. Points That Remain Contested
27
+ List issues where you and your opponent fundamentally disagreed and couldn't resolve.
28
+ ```
29
+ - ISSUE: [title]
30
+ MY POSITION: [your view]
31
+ OPPONENT POSITION: [their view]
32
+ REQUIRES: Human arbitration
33
+ ```
34
+
35
+ ### 4. Recommended Path Forward
36
+ Given your critique, what should the proposal author do?
37
+ - List concrete changes needed
38
+ - Prioritize by severity
39
+ - Note any "blockers" that must be resolved before proceeding
40
+
41
+ ---
42
+
43
+ Remember: This Peer Review will be defended by the original author (the Strategist).
44
+ Be specific and evidence-based so they can respond to concrete claims.
@@ -0,0 +1,17 @@
1
+ {{ winner_lens }}
2
+
3
+ ---
4
+ ORIGINAL PROPOSAL:
5
+ {{ game_plan }}
6
+
7
+ ---
8
+ YOUR PEER REVIEW:
9
+ {{ peer_review }}
10
+
11
+ ---
12
+ STRATEGIST DEFENSE ({{ round_label }}):
13
+ {{ strategist_response }}
14
+
15
+ ---
16
+ Respond to the Strategist's defense. Challenge any weak defenses. Acknowledge valid points.
17
+ Do NOT simply accept concessions - probe for full understanding.