PyPI - multi-model-debate - Versions diffs - 1.0.1__py3-none-any.whl - Mend

multi-model-debate 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

multi_model_debate/__init__.py +4 -0
multi_model_debate/__main__.py +6 -0
multi_model_debate/cli.py +290 -0
multi_model_debate/config.py +271 -0
multi_model_debate/exceptions.py +83 -0
multi_model_debate/models/__init__.py +71 -0
multi_model_debate/models/claude.py +168 -0
multi_model_debate/models/cli_wrapper.py +233 -0
multi_model_debate/models/gemini.py +66 -0
multi_model_debate/models/openai.py +66 -0
multi_model_debate/models/protocols.py +35 -0
multi_model_debate/orchestrator.py +465 -0
multi_model_debate/phases/__init__.py +22 -0
multi_model_debate/phases/base.py +236 -0
multi_model_debate/phases/baseline.py +117 -0
multi_model_debate/phases/debate.py +154 -0
multi_model_debate/phases/defense.py +186 -0
multi_model_debate/phases/final_position.py +307 -0
multi_model_debate/phases/judge.py +177 -0
multi_model_debate/phases/synthesis.py +162 -0
multi_model_debate/pre_debate.py +83 -0
multi_model_debate/prompts/arbiter_prompt.md.j2 +24 -0
multi_model_debate/prompts/arbiter_summary.md.j2 +102 -0
multi_model_debate/prompts/baseline_critique.md.j2 +5 -0
multi_model_debate/prompts/critic_1_lens.md.j2 +52 -0
multi_model_debate/prompts/critic_2_lens.md.j2 +52 -0
multi_model_debate/prompts/debate_round.md.j2 +14 -0
multi_model_debate/prompts/defense_initial.md.j2 +9 -0
multi_model_debate/prompts/defense_round.md.j2 +8 -0
multi_model_debate/prompts/judge.md.j2 +34 -0
multi_model_debate/prompts/judge_prompt.md.j2 +13 -0
multi_model_debate/prompts/strategist_proxy_lens.md.j2 +33 -0
multi_model_debate/prompts/synthesis_prompt.md.j2 +16 -0
multi_model_debate/prompts/synthesis_template.md.j2 +44 -0
multi_model_debate/prompts/winner_response.md.j2 +17 -0
multi_model_debate/response_parser.py +268 -0
multi_model_debate/roles.py +163 -0
multi_model_debate/storage/__init__.py +17 -0
multi_model_debate/storage/run.py +509 -0
multi_model_debate-1.0.1.dist-info/METADATA +572 -0
multi_model_debate-1.0.1.dist-info/RECORD +44 -0
multi_model_debate-1.0.1.dist-info/WHEEL +4 -0
multi_model_debate-1.0.1.dist-info/entry_points.txt +2 -0
multi_model_debate-1.0.1.dist-info/licenses/LICENSE +21 -0

multi_model_debate/pre_debate.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""Pre-debate protocol for grounding debates in current context.
+This module implements a lightweight pre-debate sequence that injects
+the current date into the debate context. This ensures all models are
+aware of the current date for relevance assessment.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from datetime import datetime
+from typing import TYPE_CHECKING
+from rich.console import Console
+if TYPE_CHECKING:
+    from collections.abc import Mapping
+    from multi_model_debate.config import Config
+    from multi_model_debate.models.protocols import ModelBackend
+console = Console()
+@dataclass
+class ProtocolResult:
+    """Result of the pre-debate protocol."""
+    confirmed: bool
+    date_context: str
+class PreDebateProtocol:
+    """Pre-debate protocol for grounding debates in current context.
+    Injects the current date into the debate context so models can
+    assess proposal relevance against current technology.
+    """
+    def __init__(
+        self,
+        models: Mapping[str, ModelBackend],
+        config: Config,
+    ) -> None:
+        """Initialize the pre-debate protocol.
+        Args:
+            models: Mapping of model name to backend.
+            config: Configuration settings.
+        """
+        self.models = models
+        self.config = config
+        self.date_context = ""
+    def run(self) -> ProtocolResult:
+        """Execute the pre-debate protocol.
+        Returns:
+            ProtocolResult with date context.
+        """
+        console.print()
+        console.print("[bold cyan]PRE-DEBATE PROTOCOL[/bold cyan]")
+        console.print()
+        # Inject current date context
+        self._inject_date()
+        console.print("[bold green]Pre-debate protocol complete[/bold green]")
+        console.print()
+        return ProtocolResult(
+            confirmed=True,
+            date_context=self.date_context,
+        )
+    def _inject_date(self) -> None:
+        """Set current date context string."""
+        today = datetime.now()
+        date_str = today.strftime("%Y-%m-%d")
+        date_long = today.strftime("%B %d, %Y")
+        self.date_context = f"Today is {date_str} ({date_long})."
+        console.print(f"[dim]Date context: {self.date_context}[/dim]")

multi_model_debate/prompts/arbiter_prompt.md.j2 ADDED Viewed

@@ -0,0 +1,24 @@
+{{ arbiter_template }}
+---
+ORIGINAL PROPOSAL:
+{{ game_plan }}
+---
+WINNER: {{ winner }}
+---
+JUDGE REASONING:
+{{ judge_decision }}
+---
+WINNER PEER REVIEW:
+{{ peer_review }}
+---
+FINAL WINNER CRITIQUE:
+{{ final_winner }}
+---
+FINAL STRATEGIST DEFENSE:
+{{ final_strategist }}

multi_model_debate/prompts/arbiter_summary.md.j2 ADDED Viewed

@@ -0,0 +1,102 @@
+You are generating a summary report for a non-technical human arbiter who will make final decisions on this proposal. They defer to technical consensus but decide on tradeoffs, priorities, and constraints.
+Generate a comprehensive summary with these sections:
+## 1. EXECUTIVE SUMMARY
+3-5 sentences: What was reviewed, who won, overall verdict (APPROVED / CONDITIONAL / BLOCKED).
+## 2. ISSUES BY CATEGORY
+### TECHNICAL FACTS (Defer to consensus)
+Issues where models agree on objective technical reality.
+| Issue | Severity | Consensus | Status |
+|-------|----------|-----------|--------|
+### TRADEOFFS (Human decision required)
+Issues where both positions have merit - arbiter chooses based on values.
+| Tradeoff | Option A | Option B | Models Preferred |
+|----------|----------|----------|------------------|
+### CONSTRAINTS (Human decision required)
+Issues that question stated constraints (budget, tech stack, timeline).
+| Constraint Challenged | Critique | Your Call |
+|-----------------------|----------|-----------|
+## 3. WHAT WAS RESOLVED
+- **Defended successfully:** Strategist convinced the critic
+- **Conceded:** Strategist agreed with critique
+- **Technical consensus:** All models agree on fix
+## 4. WHAT NEEDS YOUR DECISION
+| # | Decision Needed | Context | Options |
+|---|-----------------|---------|---------|
+## 5. RECOMMENDED ACTIONS
+| Priority | Action | Why | Effort |
+|----------|--------|-----|--------|
+| BLOCKER | ... | ... | S/M/L |
+| HIGH | ... | ... | S/M/L |
+| MEDIUM | ... | ... | S/M/L |
+## 6. MINORITY DISSENT
+Valid points from the losing side worth reconsidering:
+| Point | Raised By | Why Rejected | Reconsider? |
+|-------|-----------|--------------|-------------|
+## 7. PROPOSAL CROSS-REFERENCE
+| Proposal Section | Issues Found | Severity |
+|------------------|--------------|----------|
+## 8. MY RECOMMENDATION
+As the Strategist (the proposal defender), provide your honest assessment:
+- What do YOU think the arbiter should decide on each tradeoff and why?
+- Are there any issues you conceded that you think are lower priority than the critic claimed?
+- Are there any defended points you're less confident about in hindsight?
+- What's the single most important thing to get right before implementation?
+Be direct and share your reasoning. The arbiter is a non-technical business user who relies on your guidance to understand the technical implications.
+---
+## 9. ISSUE RESPONSE CHECKLIST
+**IMPORTANT**: You MUST include this checklist showing how EVERY issue from the debate was addressed. Use this exact format:
+| Issue ID | Title | Response | Status |
+|----------|-------|----------|--------|
+For each issue raised during the debate (from baseline critiques through the defense rounds), add a row with:
+- **Issue ID**: The original issue ID (e.g., ISSUE-001)
+- **Title**: Brief issue title
+- **Response**: 1-2 sentence summary of how you addressed it
+- **Status**: One of:
+  - `ADDRESSED` - Issue was fixed or successfully defended with rationale
+  - `REJECTED` - Issue was rejected with explanation (not a valid concern)
+  - `DEFERRED` - Issue acknowledged but deferred to future version
+  - `NOT_APPLICABLE` - Issue no longer relevant (e.g., feature removed)
+Example:
+| ISSUE-001 | Role assignment bias | Defended: Judge evaluates critics, not plan | ADDRESSED |
+| ISSUE-002 | Env-var detection | Conceded: Will document | ADDRESSED |
+| ISSUE-003 | Research step undefined | N/A: Already removed in v4 | NOT_APPLICABLE |
+This checklist ensures all critiques were considered and nothing was overlooked.
+---
+## WHAT WOULD YOU LIKE TO DO?
+End with this exact question to the arbiter:
+"Based on this review, would you like me to:
+1. **Generate an execution prompt for CC2** to implement all consensus fixes (I'll include the decisions you make on tradeoffs)
+2. **Discuss specific issues further** before proceeding
+3. **Revise the game plan** to incorporate changes before implementation
+What's your preference?"

multi_model_debate/prompts/baseline_critique.md.j2 ADDED Viewed

@@ -0,0 +1,5 @@
+{{ lens_prompt }}
+---
+PROPOSAL TO CRITIQUE:
+{{ game_plan }}

multi_model_debate/prompts/critic_1_lens.md.j2 ADDED Viewed

@@ -0,0 +1,52 @@
+You are acting as a persistent adversarial peer reviewer.
+Your job is to CHALLENGE the proposal, not to improve it.
+Assume the proposal is flawed unless proven otherwise.
+## Your Focus Areas
+- Internal consistency and logical coherence
+- Validator completeness and coverage gaps
+- Failure modes under scale, evolution, or partial failure
+- Hidden assumptions that aren't stated
+- Missing constraints or edge cases
+- Implementation risks and technical debt
+## Rules
+1. Do NOT propose solutions unless required to demonstrate a flaw
+2. Do NOT converge toward agreement with other reviewers
+3. Do NOT defer to prior critiques - form your own assessment
+4. Do NOT soften your critique to be polite
+5. Reference specific parts of the proposal when critiquing
+## Response Format
+You MUST respond with valid JSON in a ```json code block. Use this exact structure:
+```json
+{
+  "schema_version": "1.0",
+  "has_new_issues": true,
+  "issues": [
+    {
+      "id": "ISSUE-001",
+      "severity": "HIGH",
+      "title": "Brief issue title",
+      "claim": "What you believe is wrong or risky",
+      "evidence": "Quote or reference from proposal",
+      "failure_mode": "What breaks if this isn't addressed"
+    }
+  ],
+  "summary": "Brief summary of your critique"
+}
+```
+If you have NO new issues to raise, respond with:
+```json
+{
+  "schema_version": "1.0",
+  "has_new_issues": false,
+  "issues": [],
+  "summary": "Previous critiques adequately cover concerns."
+}
+```
+Do NOT re-raise issues that have been marked as addressed unless you have NEW evidence.

multi_model_debate/prompts/critic_2_lens.md.j2 ADDED Viewed

@@ -0,0 +1,52 @@
+You are acting as a persistent adversarial peer reviewer.
+Your job is to CHALLENGE the proposal, not to improve it.
+Assume the proposal is flawed unless proven otherwise.
+## Your Focus Areas
+- Security vulnerabilities and attack surface
+- Observability gaps (logging, monitoring, alerting)
+- Deployment and rollback complexity
+- Dependency risks and version compatibility
+- Maintenance burden and operational overhead
+- Disaster recovery and data integrity
+## Rules
+1. Do NOT propose solutions unless required to demonstrate a flaw
+2. Do NOT converge toward agreement with other reviewers
+3. Do NOT defer to prior critiques - form your own assessment
+4. Do NOT soften your critique to be polite
+5. Reference specific parts of the proposal when critiquing
+## Response Format
+You MUST respond with valid JSON in a ```json code block. Use this exact structure:
+```json
+{
+  "schema_version": "1.0",
+  "has_new_issues": true,
+  "issues": [
+    {
+      "id": "ISSUE-001",
+      "severity": "HIGH",
+      "title": "Brief issue title",
+      "claim": "What you believe is wrong or risky",
+      "evidence": "Quote or reference from proposal",
+      "operational_risk": "What operational failure this could cause"
+    }
+  ],
+  "summary": "Brief summary of your critique"
+}
+```
+If you have NO new issues to raise, respond with:
+```json
+{
+  "schema_version": "1.0",
+  "has_new_issues": false,
+  "issues": [],
+  "summary": "Previous critiques adequately cover concerns."
+}
+```
+Do NOT re-raise issues that have been marked as addressed unless you have NEW evidence.

multi_model_debate/prompts/debate_round.md.j2 ADDED Viewed

@@ -0,0 +1,14 @@
+{{ lens_prompt }}
+---
+ORIGINAL PROPOSAL:
+{{ game_plan }}
+---
+{{ opponent_name }}'S CRITIQUE ({{ round_label }}):
+{{ opponent_response }}
+---
+Respond to {{ opponent_name }}'s critique. Identify flaws in their reasoning or raise new issues they missed.
+Do NOT simply agree. Challenge their position.
+Do NOT re-raise issues marked as addressed unless you have NEW evidence.

multi_model_debate/prompts/defense_initial.md.j2 ADDED Viewed

@@ -0,0 +1,9 @@
+{{ strategist_lens }}
+---
+WINNER'S PEER REVIEW TO DEFEND AGAINST:
+{{ peer_review }}
+---
+You authored this proposal. Defend it against the Peer Review above.
+Use your knowledge of WHY you made each design decision.

multi_model_debate/prompts/defense_round.md.j2 ADDED Viewed

@@ -0,0 +1,8 @@
+{{ strategist_lens }}
+---
+WINNER'S CRITIQUE (Round {{ round_number }}):
+{{ winner_response }}
+---
+Defend against the above critique. Only address NEW points raised.

multi_model_debate/prompts/judge.md.j2 ADDED Viewed

@@ -0,0 +1,34 @@
+You are a neutral judge determining which adversarial reviewer performed better.
+You are reviewing the final positions from two critics who debated a proposal.
+## Your Task
+Determine which critic "won" based on these criteria:
+1. **Issue Quality**: Which side surfaced more NEW, high-severity failure modes grounded in the proposal text?
+2. **Internal Consistency**: Which side's claims remained logically consistent across rounds?
+3. **Evidence-Based**: Which side relied on fewer hidden assumptions and provided concrete evidence?
+4. **Novelty**: Which side avoided re-hashing already-addressed points?
+## Important
+- Do NOT judge by "who agreed with whom" - convergence is not a quality signal
+- Do NOT favor the more rhetorically persuasive argument if it lacks substance
+- ONLY consider the final positions provided, not intermediate rounds
+## Output Format
+Your response MUST include a line that starts with "WINNER:" followed by the name of the winning critic (matching one of the section headers below).
+```
+WINNER: [critic name from section headers]
+REASONING:
+[2-3 sentences explaining why this critic's critique was stronger]
+KEY ISSUES FROM WINNER:
+- [Issue 1]
+- [Issue 2]
+- [Issue 3]
+CONTESTED POINTS:
+- [Points where both critics disagreed and human should arbitrate]
+```

multi_model_debate/prompts/judge_prompt.md.j2 ADDED Viewed

@@ -0,0 +1,13 @@
+{{ judge_template }}
+---
+ORIGINAL PROPOSAL:
+{{ game_plan }}
+---
+{{ critic_a_name | upper }} FINAL POSITION:
+{{ critic_a_final }}
+---
+{{ critic_b_name | upper }} FINAL POSITION:
+{{ critic_b_final }}

multi_model_debate/prompts/strategist_proxy_lens.md.j2 ADDED Viewed

@@ -0,0 +1,33 @@
+You are the Strategist, the advocate defending the game plan.
+You are the ORIGINAL AUTHOR of this proposal with full context from your conversation with the user.
+Your job is to DEFEND the design decisions against adversarial critique.
+## Your Context
+- game_plan.md: The proposal being debated
+- Your conversation history with the user about why decisions were made
+## Rules
+1. Defend using your knowledge of the proposal's rationale
+2. Do NOT invent new justifications that weren't part of the original reasoning
+3. CONCEDE points ONLY if the critique provides evidence that overrides your rationale
+4. If a critique targets something you don't have context for, state: "This targets an undocumented aspect. Escalating to human arbiter."
+5. If a critique targets a "non-negotiable", defend firmly with the documented reason
+## Response Format
+For each critique addressed:
+```
+ISSUE: [Reference the issue ID or title]
+RESPONSE: DEFEND | CONCEDE | ESCALATE
+REASONING: [Your defense or reason for conceding]
+```
+At the end, summarize:
+```
+DEFENDED: [count]
+CONCEDED: [count]
+ESCALATED: [count]
+POSITION SUMMARY:
+[1-2 sentences on overall stance]
+```

multi_model_debate/prompts/synthesis_prompt.md.j2 ADDED Viewed

@@ -0,0 +1,16 @@
+{{ synthesis_template }}
+---
+ORIGINAL PROPOSAL:
+{{ game_plan }}
+---
+YOUR FINAL POSITION (as winner):
+{{ winner_final }}
+---
+OPPONENT FINAL POSITION (loser):
+{{ loser_final }}
+---
+Produce your synthesis following the template above.

multi_model_debate/prompts/synthesis_template.md.j2 ADDED Viewed

@@ -0,0 +1,44 @@
+# Peer Review
+You are the WINNER of the GPT vs Gemini debate. Your task is to produce a comprehensive Peer Review for the Strategist to defend against.
+## Required Sections
+### 1. Points I Maintain
+List the critiques from your debate that you still believe are valid and should be addressed.
+For each:
+```
+- ISSUE: [title]
+  SEVERITY: HIGH | MEDIUM | LOW
+  CLAIM: [what is wrong]
+  EVIDENCE: [from proposal]
+```
+### 2. Points I Adopted From Opponent
+List any valid points your opponent (the loser) raised that you now agree with.
+For each:
+```
+- ISSUE: [title]
+  ORIGINALLY FROM: [GPT | GEMINI]
+  REASON FOR ADOPTION: [why you now agree]
+```
+### 3. Points That Remain Contested
+List issues where you and your opponent fundamentally disagreed and couldn't resolve.
+```
+- ISSUE: [title]
+  MY POSITION: [your view]
+  OPPONENT POSITION: [their view]
+  REQUIRES: Human arbitration
+```
+### 4. Recommended Path Forward
+Given your critique, what should the proposal author do?
+- List concrete changes needed
+- Prioritize by severity
+- Note any "blockers" that must be resolved before proceeding
+---
+Remember: This Peer Review will be defended by the original author (the Strategist).
+Be specific and evidence-based so they can respond to concrete claims.

multi_model_debate/prompts/winner_response.md.j2 ADDED Viewed

@@ -0,0 +1,17 @@
+{{ winner_lens }}
+---
+ORIGINAL PROPOSAL:
+{{ game_plan }}
+---
+YOUR PEER REVIEW:
+{{ peer_review }}
+---
+STRATEGIST DEFENSE ({{ round_label }}):
+{{ strategist_response }}
+---
+Respond to the Strategist's defense. Challenge any weak defenses. Acknowledge valid points.
+Do NOT simply accept concessions - probe for full understanding.