multi-model-debate 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- multi_model_debate/__init__.py +4 -0
- multi_model_debate/__main__.py +6 -0
- multi_model_debate/cli.py +290 -0
- multi_model_debate/config.py +271 -0
- multi_model_debate/exceptions.py +83 -0
- multi_model_debate/models/__init__.py +71 -0
- multi_model_debate/models/claude.py +168 -0
- multi_model_debate/models/cli_wrapper.py +233 -0
- multi_model_debate/models/gemini.py +66 -0
- multi_model_debate/models/openai.py +66 -0
- multi_model_debate/models/protocols.py +35 -0
- multi_model_debate/orchestrator.py +465 -0
- multi_model_debate/phases/__init__.py +22 -0
- multi_model_debate/phases/base.py +236 -0
- multi_model_debate/phases/baseline.py +117 -0
- multi_model_debate/phases/debate.py +154 -0
- multi_model_debate/phases/defense.py +186 -0
- multi_model_debate/phases/final_position.py +307 -0
- multi_model_debate/phases/judge.py +177 -0
- multi_model_debate/phases/synthesis.py +162 -0
- multi_model_debate/pre_debate.py +83 -0
- multi_model_debate/prompts/arbiter_prompt.md.j2 +24 -0
- multi_model_debate/prompts/arbiter_summary.md.j2 +102 -0
- multi_model_debate/prompts/baseline_critique.md.j2 +5 -0
- multi_model_debate/prompts/critic_1_lens.md.j2 +52 -0
- multi_model_debate/prompts/critic_2_lens.md.j2 +52 -0
- multi_model_debate/prompts/debate_round.md.j2 +14 -0
- multi_model_debate/prompts/defense_initial.md.j2 +9 -0
- multi_model_debate/prompts/defense_round.md.j2 +8 -0
- multi_model_debate/prompts/judge.md.j2 +34 -0
- multi_model_debate/prompts/judge_prompt.md.j2 +13 -0
- multi_model_debate/prompts/strategist_proxy_lens.md.j2 +33 -0
- multi_model_debate/prompts/synthesis_prompt.md.j2 +16 -0
- multi_model_debate/prompts/synthesis_template.md.j2 +44 -0
- multi_model_debate/prompts/winner_response.md.j2 +17 -0
- multi_model_debate/response_parser.py +268 -0
- multi_model_debate/roles.py +163 -0
- multi_model_debate/storage/__init__.py +17 -0
- multi_model_debate/storage/run.py +509 -0
- multi_model_debate-1.0.1.dist-info/METADATA +572 -0
- multi_model_debate-1.0.1.dist-info/RECORD +44 -0
- multi_model_debate-1.0.1.dist-info/WHEEL +4 -0
- multi_model_debate-1.0.1.dist-info/entry_points.txt +2 -0
- multi_model_debate-1.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,572 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: multi-model-debate
|
|
3
|
+
Version: 1.0.1
|
|
4
|
+
Summary: Let AI models argue so you don't have to
|
|
5
|
+
Author: Mark Heck
|
|
6
|
+
License: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: ai,code-review,debate,llm,multi-model
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Environment :: Console
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
16
|
+
Requires-Python: >=3.11
|
|
17
|
+
Requires-Dist: jinja2>=3.1
|
|
18
|
+
Requires-Dist: pydantic-settings>=2.0
|
|
19
|
+
Requires-Dist: pydantic>=2.0
|
|
20
|
+
Requires-Dist: rich>=13.0
|
|
21
|
+
Requires-Dist: typer>=0.9.0
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: mypy>=1.0; extra == 'dev'
|
|
24
|
+
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
25
|
+
Requires-Dist: pytest-mock>=3.0; extra == 'dev'
|
|
26
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
27
|
+
Requires-Dist: ruff>=0.1; extra == 'dev'
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# Multi-Model Debate
|
|
31
|
+
|
|
32
|
+
**Get your ideas stress-tested by AI before you build them.**
|
|
33
|
+
|
|
34
|
+
You know that feeling when you're about to start a project and you *wish* you could get a few smart people to poke holes in your plan first? This tool does exactly that, except the "smart people" are different AI models debating each other about your idea.
|
|
35
|
+
|
|
36
|
+

|
|
37
|
+
|
|
38
|
+
## What It Does
|
|
39
|
+
|
|
40
|
+
You describe what you want to build. Two AI models then:
|
|
41
|
+
|
|
42
|
+
1. **Critique your plan** independently (finding different problems)
|
|
43
|
+
2. **Debate each other** about which issues matter most
|
|
44
|
+
3. **A judge picks a winner** based on argument quality
|
|
45
|
+
4. **The winning critic's points get consolidated**
|
|
46
|
+
5. **Your original AI defends your plan** against the winner
|
|
47
|
+
6. **You get a final report** with clear recommendations
|
|
48
|
+
|
|
49
|
+
The whole process takes about 10-20 minutes, *depending on complexity*, and runs automatically.
|
|
50
|
+
|
|
51
|
+
## Why Use This?
|
|
52
|
+
|
|
53
|
+
| Without This Tool | With This Tool |
|
|
54
|
+
|-------------------|----------------|
|
|
55
|
+
| You ask one AI for feedback | Three AIs argue about your plan |
|
|
56
|
+
| AI tends to agree with you | AIs are prompted to find problems |
|
|
57
|
+
| Criticism may be shallow | Multi-round debate deepens analysis |
|
|
58
|
+
| You might miss blind spots | Different AI "personalities" catch different issues |
|
|
59
|
+
| No structure to the feedback | Organized report with priorities |
|
|
60
|
+
|
|
61
|
+
**Best for:**
|
|
62
|
+
- Architecture decisions
|
|
63
|
+
- Feature designs
|
|
64
|
+
- Migration plans
|
|
65
|
+
- Any plan where being wrong is expensive
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## Prerequisites
|
|
70
|
+
|
|
71
|
+
You need **at least 2 AI CLIs** installed before using this tool
|
|
72
|
+
|
|
73
|
+
This tool works out of the box using the following three:
|
|
74
|
+
|
|
75
|
+
| AI | Command | How to Get It |
|
|
76
|
+
|----|---------|---------------|
|
|
77
|
+
| Claude Code | `claude` | [Install Claude Code](https://github.com/anthropics/claude-code) |
|
|
78
|
+
| Codex | `codex` | [Install OpenAI Codex CLI](https://github.com/openai/codex) |
|
|
79
|
+
| Gemini CLI | `gemini` | [Install Google Gemini CLI](https://github.com/google-gemini/gemini-cli) |
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## Quick Setup: Let Claude Do It For You
|
|
84
|
+
|
|
85
|
+
Already using Claude Code? Just paste this into your conversation:
|
|
86
|
+
|
|
87
|
+
```
|
|
88
|
+
I want to install the Multi-Model Debate tool. Please:
|
|
89
|
+
|
|
90
|
+
1. Check if pipx is installed. If not, install it
|
|
91
|
+
2. Run: pipx install multi-model-debate
|
|
92
|
+
3. Verify it works: multi-model-debate --help
|
|
93
|
+
4. APPEND these instructions to my ~/.claude/CLAUDE.md file (create the file if it doesn't exist, but DO NOT overwrite any existing content):
|
|
94
|
+
|
|
95
|
+
## Multi-Model Debate Tool
|
|
96
|
+
|
|
97
|
+
When I say "run the debate tool", "start the debate", "do a peer review", or "review this":
|
|
98
|
+
1. Save my plan to a markdown file in the current directory
|
|
99
|
+
2. Run: multi-model-debate start <filename.md>
|
|
100
|
+
3. Wait for it to complete (about 10-20 minutes)
|
|
101
|
+
4. Show me the Final Position from the runs folder
|
|
102
|
+
|
|
103
|
+
When I say "resume the debate" or "continue the review":
|
|
104
|
+
1. Run: multi-model-debate resume
|
|
105
|
+
|
|
106
|
+
When I say "check debate status":
|
|
107
|
+
1. Run: multi-model-debate status
|
|
108
|
+
|
|
109
|
+
Confirm everything is set up
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
That's it! Claude will handle the rest. Once done, you can say "run the debate tool" anytime during your Claude Code session.
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## Manual Setup
|
|
117
|
+
|
|
118
|
+
*Skip this if you used the Quick Setup above.*
|
|
119
|
+
|
|
120
|
+
### Step 1: Install the Tool
|
|
121
|
+
|
|
122
|
+
Open your terminal (Terminal app on Mac, or Command Prompt/PowerShell on Windows) and run this command:
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
pipx install multi-model-debate
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
This downloads and installs the tool from [PyPI](https://pypi.org/project/multi-model-debate/).
|
|
129
|
+
|
|
130
|
+
> **Don't have pipx?** Install it first:
|
|
131
|
+
> - **Mac:** `brew install pipx && pipx ensurepath`
|
|
132
|
+
> - **Linux:** `sudo apt install pipx && pipx ensurepath`
|
|
133
|
+
> - **Windows:** `scoop install pipx` or `pip install --user pipx`
|
|
134
|
+
>
|
|
135
|
+
> Then restart your terminal and run the install command above.
|
|
136
|
+
|
|
137
|
+
To verify it worked, run:
|
|
138
|
+
```bash
|
|
139
|
+
multi-model-debate --help
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
You should see a list of commands.
|
|
143
|
+
|
|
144
|
+
### Step 2: Teach your model the Commands (Example using Claude Code)
|
|
145
|
+
|
|
146
|
+
If you want to use this tool from inside Claude Code by saying things like "run the debate tool", you need to add instructions to a special file called **CLAUDE.md**.
|
|
147
|
+
|
|
148
|
+
**Where to put it:**
|
|
149
|
+
- `~/.claude/CLAUDE.md` applies to ALL your projects (recommended)
|
|
150
|
+
- Or `CLAUDE.md` in a specific project folder; applies only to that project
|
|
151
|
+
|
|
152
|
+
**What to add:**
|
|
153
|
+
|
|
154
|
+
Open (or create) the file and **add this at the bottom** (don't replace existing content):
|
|
155
|
+
|
|
156
|
+
```markdown
|
|
157
|
+
## Multi-Model Debate Tool
|
|
158
|
+
|
|
159
|
+
When I say "run the debate tool", "start the debate", "do a peer review", or "review this":
|
|
160
|
+
1. Save my plan to a markdown file in the current directory
|
|
161
|
+
2. Run: multi-model-debate start <filename.md>
|
|
162
|
+
3. Wait for it to complete (about 10-20 minutes)
|
|
163
|
+
4. Show me the Final Position from the runs folder
|
|
164
|
+
|
|
165
|
+
When I say "resume the debate" or "continue the review":
|
|
166
|
+
1. Run: multi-model-debate resume
|
|
167
|
+
|
|
168
|
+
When I say "check debate status":
|
|
169
|
+
1. Run: multi-model-debate status
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
> **Where is ~/.claude/?**
|
|
173
|
+
> - **Mac/Linux:** It's a hidden folder in your home directory. In terminal: `open ~/.claude` (Mac) or `xdg-open ~/.claude` (Linux)
|
|
174
|
+
> - **Windows:** `C:\Users\YourName\.claude\`
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## How to Use It
|
|
179
|
+
|
|
180
|
+
### Option A: From Inside your AI CLI
|
|
181
|
+
*The recommended option. Your AI will defend your plan with context*
|
|
182
|
+
|
|
183
|
+
Once you've completed setup, just talk naturally:
|
|
184
|
+
|
|
185
|
+
**Start a review:**
|
|
186
|
+
1. Describe your plan to AI like you normally would
|
|
187
|
+
2. Say **"run the debate tool"**
|
|
188
|
+
3. Wait about 10-20 minutes
|
|
189
|
+
4. Your AI CLI will show you the results
|
|
190
|
+
|
|
191
|
+
**Other commands you can say:**
|
|
192
|
+
|
|
193
|
+
| Say This | What Happens |
|
|
194
|
+
|----------|--------------|
|
|
195
|
+
| "run the debate tool" | Starts a new review of your plan |
|
|
196
|
+
| "resume the debate" | Continues if it got interrupted |
|
|
197
|
+
| "check debate status" | Shows progress |
|
|
198
|
+
| "show me the final position" | Displays the results again |
|
|
199
|
+
|
|
200
|
+
### Option B: Standalone
|
|
201
|
+
|
|
202
|
+
You can also run the tool directly from the terminal:
|
|
203
|
+
|
|
204
|
+
**From a file:**
|
|
205
|
+
```bash
|
|
206
|
+
multi-model-debate start [my-plan].md
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
**By typing your plan directly:**
|
|
210
|
+
```bash
|
|
211
|
+
multi-model-debate start --stdin
|
|
212
|
+
```
|
|
213
|
+
Then type or paste your plan, and press `Ctrl+D` (Mac/Linux) or `Ctrl+Z` then Enter (Windows) when done.
|
|
214
|
+
|
|
215
|
+
**Other commands:**
|
|
216
|
+
```bash
|
|
217
|
+
multi-model-debate status # Check progress
|
|
218
|
+
multi-model-debate resume # Continue interrupted debate
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
## Where to Find the Results
|
|
224
|
+
|
|
225
|
+
### Debate Files Location
|
|
226
|
+
|
|
227
|
+
All debates are saved in a **`runs/`** folder in your current directory:
|
|
228
|
+
|
|
229
|
+
```
|
|
230
|
+
your-project/
|
|
231
|
+
└── runs/
|
|
232
|
+
└── 20260123_143052/ ← One folder per debate (date_time)
|
|
233
|
+
├── 00_game_plan.md ← Your original plan
|
|
234
|
+
├── p1_gemini_baseline.json
|
|
235
|
+
├── p1_codex_baseline.json
|
|
236
|
+
├── p2_r1_gemini.json ← Debate rounds
|
|
237
|
+
├── p2_r2_codex.json
|
|
238
|
+
├── ...
|
|
239
|
+
├── p3_winner_decision.md
|
|
240
|
+
├── p4_peer_review.md
|
|
241
|
+
├── p5_r1_strategist.md ← Defense rounds
|
|
242
|
+
├── ...
|
|
243
|
+
└── p6_final_position.md ← THE FINAL SUMMARY (start here!)
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
### The Summary File
|
|
247
|
+
|
|
248
|
+
The file you care about most is:
|
|
249
|
+
|
|
250
|
+
```
|
|
251
|
+
runs/<latest-folder>/p6_final_position.md
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
This is the **Final Position**: a structured summary of everything that happened in the debate, with clear recommendations for you.
|
|
255
|
+
|
|
256
|
+
**Quick way to find it:**
|
|
257
|
+
- From AI CLI: Say "show me the final position"
|
|
258
|
+
- From terminal: `ls -t runs/` shows newest folder first, then open `p6_final_position.md`
|
|
259
|
+
|
|
260
|
+
---
|
|
261
|
+
|
|
262
|
+
## What You Get Back
|
|
263
|
+
|
|
264
|
+
The **Final Position** (`p6_final_position.md`) contains:
|
|
265
|
+
|
|
266
|
+
| Section | What It Tells You |
|
|
267
|
+
|---------|-------------------|
|
|
268
|
+
| **Executive Summary** | Quick verdict: APPROVED, CONDITIONAL, or BLOCKED |
|
|
269
|
+
| **Issues by Category** | Technical facts vs. tradeoffs vs. constraints |
|
|
270
|
+
| **What Was Resolved** | Points defended or conceded during debate |
|
|
271
|
+
| **What Needs Your Decision** | Things only a human can decide |
|
|
272
|
+
| **Recommended Actions** | Prioritized fixes (BLOCKER → HIGH → MEDIUM) |
|
|
273
|
+
| **My Recommendation** | The AI's honest opinion on tradeoffs |
|
|
274
|
+
|
|
275
|
+
### Example Output
|
|
276
|
+
|
|
277
|
+
```markdown
|
|
278
|
+
## EXECUTIVE SUMMARY
|
|
279
|
+
CONDITIONAL APPROVAL — the core architecture is sound, but four
|
|
280
|
+
clarifications are required before implementation.
|
|
281
|
+
|
|
282
|
+
## WHAT NEEDS YOUR DECISION
|
|
283
|
+
| # | Decision | Options |
|
|
284
|
+
|---|----------|---------|
|
|
285
|
+
| 1 | Burst allowance | A) Strict (10), B) Moderate (25), C) Permissive (50) |
|
|
286
|
+
| 2 | Consistency model | A) Exact global (slower), B) Approximate (faster) |
|
|
287
|
+
|
|
288
|
+
## RECOMMENDED ACTIONS
|
|
289
|
+
| Priority | Action | Why |
|
|
290
|
+
|----------|--------|-----|
|
|
291
|
+
| BLOCKER | Define burst capacity | Without this, 100 requests can hit in 1ms |
|
|
292
|
+
| HIGH | Specify consistency strategy | Avoids surprise latency |
|
|
293
|
+
|
|
294
|
+
## MY RECOMMENDATION
|
|
295
|
+
Define the burst capacity first. Everything else is refinement.
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
---
|
|
299
|
+
|
|
300
|
+
## Troubleshooting
|
|
301
|
+
|
|
302
|
+
**"Command not found: multi-model-debate"**
|
|
303
|
+
- Run `pipx ensurepath` and restart your terminal
|
|
304
|
+
- Make sure the install command completed without errors
|
|
305
|
+
|
|
306
|
+
**"Command not found: pipx"**
|
|
307
|
+
- Install pipx first (see Step 1)
|
|
308
|
+
|
|
309
|
+
**"No models available" or the tool can't find AI CLIs**
|
|
310
|
+
- Make sure you have at least 2 AI CLIs installed (e.g., claude, codex, or gemini)
|
|
311
|
+
- Test them: `claude --version`, `codex --version`, `gemini --version`
|
|
312
|
+
|
|
313
|
+
**The debate seems stuck**
|
|
314
|
+
- Say "check debate status" (in AI CLI) or run `multi-model-debate status` (in terminal)
|
|
315
|
+
- Say "resume the debate" or run `multi-model-debate resume`
|
|
316
|
+
|
|
317
|
+
**Claude doesn't understand "run the debate tool"**
|
|
318
|
+
- Make sure the CLAUDE.md instructions were added (Quick Setup does this automatically)
|
|
319
|
+
- Check the file is in the right place (`~/.claude/CLAUDE.md`)
|
|
320
|
+
- Try restarting Claude Code
|
|
321
|
+
|
|
322
|
+
**I can't find the results**
|
|
323
|
+
- Look in the `runs/` folder in your current directory
|
|
324
|
+
- The summary is `runs/<folder>/p6_final_position.md`
|
|
325
|
+
- Run `ls runs/` to see all your debates
|
|
326
|
+
|
|
327
|
+
---
|
|
328
|
+
|
|
329
|
+
## Configuration (Optional)
|
|
330
|
+
|
|
331
|
+
The tool works out of the box with Claude, Codex, and Gemini. To customize which AI models are used, create a configuration file.
|
|
332
|
+
|
|
333
|
+
### Creating the Config File
|
|
334
|
+
|
|
335
|
+
1. Open your project folder (where you run the debate tool)
|
|
336
|
+
2. Create a new file called `multi_model_debate.toml`
|
|
337
|
+
3. Copy this starter template:
|
|
338
|
+
|
|
339
|
+
```toml
|
|
340
|
+
[roles]
|
|
341
|
+
strategist = "claude"
|
|
342
|
+
critics = ["gemini", "codex"]
|
|
343
|
+
judge = "claude"
|
|
344
|
+
|
|
345
|
+
[debate]
|
|
346
|
+
critic_rounds = 4 # How many rounds the critics debate each other
|
|
347
|
+
strategist_rounds = 4 # How many rounds your AI defends the plan
|
|
348
|
+
|
|
349
|
+
[notification]
|
|
350
|
+
enabled = true # Desktop notification when done
|
|
351
|
+
command = "notify-send" # Linux (use "osascript" wrapper for Mac)
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
### What Each Role Does
|
|
355
|
+
|
|
356
|
+
| Role | What It Does | Recommendation |
|
|
357
|
+
|------|--------------|----------------|
|
|
358
|
+
| **strategist** | Defends your plan | Use your primary AI |
|
|
359
|
+
| **critics** | Find problems with your plan | Use 2+ different AIs for diverse perspectives |
|
|
360
|
+
| **judge** | Picks which critic argued better | Same as strategist (different instance) |
|
|
361
|
+
|
|
362
|
+
> **Note:** The `critics` list must have at least 2 different AI models. This ensures diverse perspectives in the debate.
|
|
363
|
+
|
|
364
|
+
### Critic Perspectives (Lenses)
|
|
365
|
+
|
|
366
|
+
Each critic approaches your plan with a different "lens" *a set of concerns they focus on*
|
|
367
|
+
|
|
368
|
+
**How it works:**
|
|
369
|
+
|
|
370
|
+
```toml
|
|
371
|
+
[roles]
|
|
372
|
+
critics = ["gemini", "ollama"]
|
|
373
|
+
# critic_1_lens↑ ↑critic_2_lens
|
|
374
|
+
```
|
|
375
|
+
|
|
376
|
+
| Position | Lens File | Default Focus |
|
|
377
|
+
|----------|-----------|---------------|
|
|
378
|
+
| First in list | `critic_1_lens.md.j2` | Architecture, logic, scalability, edge cases |
|
|
379
|
+
| Second in list | `critic_2_lens.md.j2` | Security, deployment, maintenance, dependencies |
|
|
380
|
+
|
|
381
|
+
**Choosing which AI gets which lens:**
|
|
382
|
+
|
|
383
|
+
Think about each AI's strengths. Put the AI that's better at:
|
|
384
|
+
- **Deep technical analysis** → first position (critic_1_lens)
|
|
385
|
+
- **Practical/real-world concerns** → second position (critic_2_lens)
|
|
386
|
+
|
|
387
|
+
**Tip:** If you're unsure, Ask AI:
|
|
388
|
+
> "Which model is better at [specific strength]?"
|
|
389
|
+
|
|
390
|
+
### Customizing Lenses
|
|
391
|
+
|
|
392
|
+
The default lenses work well for software projects. For specialized domains, you can customize what each critic focuses on.
|
|
393
|
+
|
|
394
|
+
**Lens files are located at:**
|
|
395
|
+
```
|
|
396
|
+
src/multi_model_debate/prompts/
|
|
397
|
+
├── critic_1_lens.md.j2 # First critic's perspective
|
|
398
|
+
└── critic_2_lens.md.j2 # Second critic's perspective
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
**Examples by domain:**
|
|
402
|
+
|
|
403
|
+
| Domain | critic_1_lens could focus on | critic_2_lens could focus on |
|
|
404
|
+
|--------|------------------------------|------------------------------|
|
|
405
|
+
| Academia | Methodology rigor, statistical validity | Citation gaps, reproducibility, ethics |
|
|
406
|
+
| Agriculture | Soil/climate assumptions, yield models | Regulatory compliance, supply chain |
|
|
407
|
+
| Healthcare | Clinical accuracy, safety protocols | HIPAA compliance, patient outcomes |
|
|
408
|
+
|
|
409
|
+
**Tip:** Ask AI to help customize:
|
|
410
|
+
> "Help me modify the debate tool's critic lenses for [your domain]"
|
|
411
|
+
|
|
412
|
+
---
|
|
413
|
+
|
|
414
|
+
## Using Other AI Models
|
|
415
|
+
|
|
416
|
+
The tool includes defaults for Claude, Codex, and Gemini. Want to use a different AI? Follow these steps.
|
|
417
|
+
|
|
418
|
+
### Step 1: Make Sure Your AI Has a Command-Line Tool
|
|
419
|
+
|
|
420
|
+
The debate tool works by running commands in your terminal. Your AI needs a CLI (command-line interface) tool.
|
|
421
|
+
|
|
422
|
+
**Examples of AI CLIs:**
|
|
423
|
+
- **Ollama**: `ollama run llama3 "your prompt"`
|
|
424
|
+
- **[llm](https://llm.datasette.io/)**: `llm "your prompt"`
|
|
425
|
+
|
|
426
|
+
**Test it first:** Open your terminal and try running your AI with a simple prompt. If it responds, you're good!
|
|
427
|
+
|
|
428
|
+
### Step 2: Find (or Create) Your Config File
|
|
429
|
+
|
|
430
|
+
Look for `multi_model_debate.toml` in your project folder.
|
|
431
|
+
|
|
432
|
+
**Don't have one?** Create it:
|
|
433
|
+
1. Open your project folder
|
|
434
|
+
2. Create a new text file
|
|
435
|
+
3. Name it exactly: `multi_model_debate.toml`
|
|
436
|
+
|
|
437
|
+
### Step 3: Add Your AI's Settings
|
|
438
|
+
|
|
439
|
+
Open `multi_model_debate.toml` and add a section for your AI. Copy this template and fill in the blanks:
|
|
440
|
+
|
|
441
|
+
```toml
|
|
442
|
+
[cli.YOUR_AI_NAME]
|
|
443
|
+
command = "your-cli-command"
|
|
444
|
+
input_mode = "positional"
|
|
445
|
+
```
|
|
446
|
+
|
|
447
|
+
**Example for Ollama:**
|
|
448
|
+
|
|
449
|
+
```toml
|
|
450
|
+
[cli.ollama]
|
|
451
|
+
command = "ollama"
|
|
452
|
+
subcommand = "run"
|
|
453
|
+
input_mode = "positional"
|
|
454
|
+
flags = ["llama3"]
|
|
455
|
+
```
|
|
456
|
+
|
|
457
|
+
**What each setting means:**
|
|
458
|
+
|
|
459
|
+
| Setting | What to Put | Example |
|
|
460
|
+
|---------|-------------|---------|
|
|
461
|
+
| `command` | The command you type in terminal | `"ollama"` |
|
|
462
|
+
| `subcommand` | Extra word after command (if needed) | `"run"` |
|
|
463
|
+
| `input_mode` | How the prompt is sent | `"positional"` (usually this) |
|
|
464
|
+
| `flags` | Extra options (like model name) | `["llama3"]` |
|
|
465
|
+
| `timeout` | Max seconds to wait (optional) | `600` |
|
|
466
|
+
|
|
467
|
+
**Complete example with Ollama as a critic:**
|
|
468
|
+
|
|
469
|
+
```toml
|
|
470
|
+
[roles]
|
|
471
|
+
strategist = "claude"
|
|
472
|
+
critics = ["ollama", "gemini"]
|
|
473
|
+
judge = "claude"
|
|
474
|
+
|
|
475
|
+
[cli.ollama]
|
|
476
|
+
command = "ollama"
|
|
477
|
+
subcommand = "run"
|
|
478
|
+
input_mode = "positional"
|
|
479
|
+
flags = ["llama3"]
|
|
480
|
+
timeout = 600
|
|
481
|
+
```
|
|
482
|
+
|
|
483
|
+
### Step 4: Test It
|
|
484
|
+
|
|
485
|
+
Run a debate and check that your AI responds. If you see errors, double-check:
|
|
486
|
+
- Is the CLI installed? (Try running it in terminal)
|
|
487
|
+
- Is the spelling exactly right in the config?
|
|
488
|
+
- Did you save the file?
|
|
489
|
+
|
|
490
|
+
### Need Help?
|
|
491
|
+
|
|
492
|
+
Just ask AI:
|
|
493
|
+
|
|
494
|
+
> "Help me configure the debate tool to use [your AI name]"
|
|
495
|
+
|
|
496
|
+
AI can help you figure out the right settings for its CLI.
|
|
497
|
+
|
|
498
|
+
---
|
|
499
|
+
|
|
500
|
+
## How This Was Built
|
|
501
|
+
|
|
502
|
+
I'm not a developer. This tool was built entirely with Claude Code Opus 4.5. I provided the vision and continuously questioned EVERYTHING. The code itself? All AI-generated.
|
|
503
|
+
|
|
504
|
+
If you're a developer reviewing this, I can't explain the architectural decisions or maintain this at a technical level. I only aggressively push AI for *well-architected* and *best-in-class* decisions and then have separate AI models critique it.
|
|
505
|
+
|
|
506
|
+
If you're a non-developer curious how AI can enable you, I hope this helps.
|
|
507
|
+
|
|
508
|
+
---
|
|
509
|
+
|
|
510
|
+
# Technical Reference
|
|
511
|
+
|
|
512
|
+
*Everything below is for developers.*
|
|
513
|
+
|
|
514
|
+
## How the Debate Works
|
|
515
|
+
|
|
516
|
+
```
|
|
517
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
518
|
+
│ Phase 1: Baseline Critiques │
|
|
519
|
+
│ Critic A ──────► independent critique │
|
|
520
|
+
│ Critic B ──────► independent critique │
|
|
521
|
+
├─────────────────────────────────────────────────────────────────┤
|
|
522
|
+
│ Phase 2: Adversarial Debate (4 rounds) │
|
|
523
|
+
│ Critic A ◄────► Critic B │
|
|
524
|
+
│ (They argue about which issues matter most) │
|
|
525
|
+
├─────────────────────────────────────────────────────────────────┤
|
|
526
|
+
│ Phase 3: Winner Determination │
|
|
527
|
+
│ Judge picks which critic made better arguments │
|
|
528
|
+
├─────────────────────────────────────────────────────────────────┤
|
|
529
|
+
│ Phase 4: Peer Review │
|
|
530
|
+
│ Winner consolidates all critiques │
|
|
531
|
+
├─────────────────────────────────────────────────────────────────┤
|
|
532
|
+
│ Phase 5: Strategist Defense (4 rounds) │
|
|
533
|
+
│ Your original AI defends your plan │
|
|
534
|
+
├─────────────────────────────────────────────────────────────────┤
|
|
535
|
+
│ Phase 6: Final Position │
|
|
536
|
+
│ Summary report with recommendations │
|
|
537
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
538
|
+
```
|
|
539
|
+
|
|
540
|
+
## CLI Reference
|
|
541
|
+
|
|
542
|
+
```bash
|
|
543
|
+
multi-model-debate start [OPTIONS] [FILE]
|
|
544
|
+
--stdin, - Read proposal from stdin
|
|
545
|
+
--skip-protocol Skip pre-debate date injection
|
|
546
|
+
--config, -c PATH Custom config file
|
|
547
|
+
--runs-dir, -r PATH Custom output directory
|
|
548
|
+
--verbose, -v Show detailed logs
|
|
549
|
+
|
|
550
|
+
multi-model-debate resume [OPTIONS]
|
|
551
|
+
--run PATH Resume specific run (default: latest)
|
|
552
|
+
|
|
553
|
+
multi-model-debate status
|
|
554
|
+
```
|
|
555
|
+
|
|
556
|
+
## Development
|
|
557
|
+
|
|
558
|
+
```bash
|
|
559
|
+
git clone https://github.com/markheck-solutions/multi-model-debate.git
|
|
560
|
+
cd multi-model-debate
|
|
561
|
+
python3 -m venv .venv
|
|
562
|
+
source .venv/bin/activate
|
|
563
|
+
pip install -e ".[dev]"
|
|
564
|
+
|
|
565
|
+
pytest tests/ -v
|
|
566
|
+
ruff check src/ tests/
|
|
567
|
+
mypy src/
|
|
568
|
+
```
|
|
569
|
+
|
|
570
|
+
## License
|
|
571
|
+
|
|
572
|
+
MIT
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
multi_model_debate/__init__.py,sha256=6tLyhaczmNLf2Sr_nkwOb-R6zLjp_kwUZ9vShicGlrA,121
|
|
2
|
+
multi_model_debate/__main__.py,sha256=yOW6Bx-p_38yhOUJ9vD1JGmBW3du-GVzDykPbrvUFy0,136
|
|
3
|
+
multi_model_debate/cli.py,sha256=ViAQhjzPqAQT-lvf44r5cL2zJ2UB336dYFynKwqaDdg,8732
|
|
4
|
+
multi_model_debate/config.py,sha256=qma_J9yMbNdzJi9HGsRs0iCKXBJSs1pFnZfghfLEY_U,9276
|
|
5
|
+
multi_model_debate/exceptions.py,sha256=Joxjnkj7SSSs4cFgMh-Edsj_pJ2TdEXG3d8CocU7If8,2308
|
|
6
|
+
multi_model_debate/orchestrator.py,sha256=bgn_Fpnix5xBwaV0iujjZF4FSZY5kOR2_zcYBT3eE68,16340
|
|
7
|
+
multi_model_debate/pre_debate.py,sha256=IAGZ_XacsgwlfxBFte2vyIIwJr_V-pBR3ZbTqRS3rbE,2261
|
|
8
|
+
multi_model_debate/response_parser.py,sha256=wyKZWIvkJW4W-xqsDxEBaM3GDxEXhBfWQK8zHo7rITQ,7931
|
|
9
|
+
multi_model_debate/roles.py,sha256=eGdaodoN2DHUw07bHm5nXR2Ug6StEweyhkdjCf-RSHs,5291
|
|
10
|
+
multi_model_debate/models/__init__.py,sha256=fqB_duRvi06dd1Rc8NJUp8YoExQChCv0hFNRvcxaCFI,2241
|
|
11
|
+
multi_model_debate/models/claude.py,sha256=QLxyYFaH6GUMV2rfsq8QuS_VL1GQAOVKoh0-f-GgQ4U,5471
|
|
12
|
+
multi_model_debate/models/cli_wrapper.py,sha256=AiEPa2Ko6I6jEEexsy07WHDfUQMrwlHBF4gn_lmAHkI,7597
|
|
13
|
+
multi_model_debate/models/gemini.py,sha256=gdgCxefqKvdrY4lvEtJsHwFXHG6QI2d4qOV5p8lSXJM,1959
|
|
14
|
+
multi_model_debate/models/openai.py,sha256=0z14v2T-dqq01slJnEUSi08GHrRLlalb4JwEuLKxzTs,1953
|
|
15
|
+
multi_model_debate/models/protocols.py,sha256=4hDRtzEcMmXJ9zRJKUChKrTT9RHEuoV0Pk0qEQnTXbk,1102
|
|
16
|
+
multi_model_debate/phases/__init__.py,sha256=T-Hv1kHpdTCC92oEBYt1RU5e1a9Fg0Mv4G4EP0HJspQ,714
|
|
17
|
+
multi_model_debate/phases/base.py,sha256=hWRuf9PPCJVcAwxw92o3GayuwgIxvUnI1v-1A4o_NRc,7404
|
|
18
|
+
multi_model_debate/phases/baseline.py,sha256=kGue8Z4RWXz4QxIQdc7diAIS8uIY6V0GGXevrnwQ_fE,4504
|
|
19
|
+
multi_model_debate/phases/debate.py,sha256=FW27rrCKRYRubv1AAskgU2o7M6Twg2JYXoEBoDs3lMM,6489
|
|
20
|
+
multi_model_debate/phases/defense.py,sha256=2y8hcBb46J6K9DvcVYBOTmduv8KwGMP-Rzi5eRBSIo8,7716
|
|
21
|
+
multi_model_debate/phases/final_position.py,sha256=qI3It1sxl4yv6qdG6kGtTauGNqN5D8K43cxjI2A2Dvo,10294
|
|
22
|
+
multi_model_debate/phases/judge.py,sha256=dx076E6XHSrqYmDFZ7WRyiHE0XcdEJmlNbx7WjHGkNU,6053
|
|
23
|
+
multi_model_debate/phases/synthesis.py,sha256=USSBa6o8RJg3YITdfR38NvLwPEnq_UcGQ82EIK6hl_A,5633
|
|
24
|
+
multi_model_debate/prompts/arbiter_prompt.md.j2,sha256=HGZs5FtIDp4yLJoXsFpBZY5iy9BXRz7I6j3rHYHsCOE,300
|
|
25
|
+
multi_model_debate/prompts/arbiter_summary.md.j2,sha256=c3dmec0wrRQ4smMdCsoEfu20kxwr3ApJhA-qBxNs0ZE,4082
|
|
26
|
+
multi_model_debate/prompts/baseline_critique.md.j2,sha256=ebNvc58wnpYOs9bKMTkjVFUJGZSStNRDxd59leCPb5I,66
|
|
27
|
+
multi_model_debate/prompts/critic_1_lens.md.j2,sha256=W9wE07cnhIX35xekON6yQ0En0lJqpvf4Pk3oj_gDw6o,1601
|
|
28
|
+
multi_model_debate/prompts/critic_2_lens.md.j2,sha256=MZrvp4NcRqskJEOWanh9GN0m6d684QypTg7I_Jg3Hoo,1612
|
|
29
|
+
multi_model_debate/prompts/debate_round.md.j2,sha256=AfbrHrv_bir-94P2geVlfTQfgTYviwA9u5A4eQA19VI,388
|
|
30
|
+
multi_model_debate/prompts/defense_initial.md.j2,sha256=IZVK09UUI0J7B8lKdkpPCdmzaSCPoMfzjdeIoPlw81g,225
|
|
31
|
+
multi_model_debate/prompts/defense_round.md.j2,sha256=vCaBzuroPvMzQ6VoWdCZaiupSuKCLQ3IuYWd5DF9BbY,175
|
|
32
|
+
multi_model_debate/prompts/judge.md.j2,sha256=IrhrF8-JL-drsnpSxgJT8WItBIrBV9xHPvW0WXCzL8c,1329
|
|
33
|
+
multi_model_debate/prompts/judge_prompt.md.j2,sha256=uh2ptsIPEy4hhBNkfHDPkEBoeVk9ZQWPIP-7E8Reb9c,214
|
|
34
|
+
multi_model_debate/prompts/strategist_proxy_lens.md.j2,sha256=uelu0mLIer_ScSwpuk7EtrVG14yezZpAyEOMZ69HxfU,1188
|
|
35
|
+
multi_model_debate/prompts/synthesis_prompt.md.j2,sha256=iQ6FhRM8CQhgAbVGnuN4z11dU_gN7XbRwFl2VktbDfI,252
|
|
36
|
+
multi_model_debate/prompts/synthesis_template.md.j2,sha256=p0q5D5nz3TWfhxKGZgXckir9vxNlHPx_1irfcdxvirA,1299
|
|
37
|
+
multi_model_debate/prompts/winner_response.md.j2,sha256=0CBLbIU-pgxO8sNSb_gLfsdj6X_WP2Zc8hipCygyr4c,349
|
|
38
|
+
multi_model_debate/storage/__init__.py,sha256=tkr9RBVNmDKBYHW2Cb4wU32lhd5ucFtpWZQZU-uuQNA,368
|
|
39
|
+
multi_model_debate/storage/run.py,sha256=u4nTLsb2QgcXswjcQ1L8FZlJAnwKIdKS5zxMET8FZAc,16300
|
|
40
|
+
multi_model_debate-1.0.1.dist-info/METADATA,sha256=sm62TBbV83TwLLgqCG2gE5CZmt6pBNn99hz50nciPuE,19599
|
|
41
|
+
multi_model_debate-1.0.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
42
|
+
multi_model_debate-1.0.1.dist-info/entry_points.txt,sha256=I65eUxdoCslrnbUO9ub5uAH4W81HyHiNkQGjt1PqgVU,66
|
|
43
|
+
multi_model_debate-1.0.1.dist-info/licenses/LICENSE,sha256=B3GFB_NNC5RgzLKmb3kyBSZC1nqLJmP0b1A0OUGN25g,1066
|
|
44
|
+
multi_model_debate-1.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Mark Heck
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|