multi-model-debate 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. multi_model_debate/__init__.py +4 -0
  2. multi_model_debate/__main__.py +6 -0
  3. multi_model_debate/cli.py +290 -0
  4. multi_model_debate/config.py +271 -0
  5. multi_model_debate/exceptions.py +83 -0
  6. multi_model_debate/models/__init__.py +71 -0
  7. multi_model_debate/models/claude.py +168 -0
  8. multi_model_debate/models/cli_wrapper.py +233 -0
  9. multi_model_debate/models/gemini.py +66 -0
  10. multi_model_debate/models/openai.py +66 -0
  11. multi_model_debate/models/protocols.py +35 -0
  12. multi_model_debate/orchestrator.py +465 -0
  13. multi_model_debate/phases/__init__.py +22 -0
  14. multi_model_debate/phases/base.py +236 -0
  15. multi_model_debate/phases/baseline.py +117 -0
  16. multi_model_debate/phases/debate.py +154 -0
  17. multi_model_debate/phases/defense.py +186 -0
  18. multi_model_debate/phases/final_position.py +307 -0
  19. multi_model_debate/phases/judge.py +177 -0
  20. multi_model_debate/phases/synthesis.py +162 -0
  21. multi_model_debate/pre_debate.py +83 -0
  22. multi_model_debate/prompts/arbiter_prompt.md.j2 +24 -0
  23. multi_model_debate/prompts/arbiter_summary.md.j2 +102 -0
  24. multi_model_debate/prompts/baseline_critique.md.j2 +5 -0
  25. multi_model_debate/prompts/critic_1_lens.md.j2 +52 -0
  26. multi_model_debate/prompts/critic_2_lens.md.j2 +52 -0
  27. multi_model_debate/prompts/debate_round.md.j2 +14 -0
  28. multi_model_debate/prompts/defense_initial.md.j2 +9 -0
  29. multi_model_debate/prompts/defense_round.md.j2 +8 -0
  30. multi_model_debate/prompts/judge.md.j2 +34 -0
  31. multi_model_debate/prompts/judge_prompt.md.j2 +13 -0
  32. multi_model_debate/prompts/strategist_proxy_lens.md.j2 +33 -0
  33. multi_model_debate/prompts/synthesis_prompt.md.j2 +16 -0
  34. multi_model_debate/prompts/synthesis_template.md.j2 +44 -0
  35. multi_model_debate/prompts/winner_response.md.j2 +17 -0
  36. multi_model_debate/response_parser.py +268 -0
  37. multi_model_debate/roles.py +163 -0
  38. multi_model_debate/storage/__init__.py +17 -0
  39. multi_model_debate/storage/run.py +509 -0
  40. multi_model_debate-1.0.1.dist-info/METADATA +572 -0
  41. multi_model_debate-1.0.1.dist-info/RECORD +44 -0
  42. multi_model_debate-1.0.1.dist-info/WHEEL +4 -0
  43. multi_model_debate-1.0.1.dist-info/entry_points.txt +2 -0
  44. multi_model_debate-1.0.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,509 @@
1
+ """Run directory management and checkpointing.
2
+
3
+ This module handles:
4
+ - Run directory creation and management
5
+ - Checkpointing for resume capability
6
+ - Strategist response journaling
7
+ - Prompt template hash validation
8
+
9
+ See REQUIREMENTS_V2.md Section 5 for journaling and hash validation rationale.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import hashlib
15
+ import json
16
+ import os
17
+ from dataclasses import dataclass, field
18
+ from datetime import datetime
19
+ from pathlib import Path
20
+ from typing import TYPE_CHECKING, Any
21
+
22
+ if TYPE_CHECKING:
23
+ from multi_model_debate.config import Config
24
+
25
+
26
+ # Default prompts directory (relative to package)
27
+ PROMPTS_DIR = Path(__file__).parent.parent / "prompts"
28
+
29
+ # Environment variables included in integrity hash
30
+ # These affect model detection and role assignment
31
+ INTEGRITY_ENV_VARS = (
32
+ "ADVERSARIAL_CRITIQUE_STRATEGIST",
33
+ "ANTHROPIC_MODEL",
34
+ "OPENAI_MODEL",
35
+ "GEMINI_MODEL",
36
+ )
37
+
38
+ # Default config filename
39
+ DEFAULT_CONFIG_FILE = "multi_model_debate.toml"
40
+
41
+
42
+ @dataclass
43
+ class RunContext:
44
+ """Context for a single review run.
45
+
46
+ Manages the run directory, manifest, checkpoints, and status.
47
+ """
48
+
49
+ run_dir: Path
50
+ game_plan_path: Path
51
+ config: Config
52
+ manifest: dict[str, Any] = field(default_factory=dict)
53
+
54
+ @property
55
+ def checkpoint_file(self) -> Path:
56
+ """Path to the checkpoint file."""
57
+ return self.run_dir / "checkpoint.txt"
58
+
59
+ @property
60
+ def status_file(self) -> Path:
61
+ """Path to the status file."""
62
+ return self.run_dir / "status.txt"
63
+
64
+ @property
65
+ def manifest_file(self) -> Path:
66
+ """Path to the manifest file."""
67
+ return self.run_dir / "manifest.json"
68
+
69
+ @property
70
+ def error_log(self) -> Path:
71
+ """Path to the CLI error log."""
72
+ return self.run_dir / "cli_errors.log"
73
+
74
+ @property
75
+ def journal_path(self) -> Path:
76
+ """Path to the Strategist response journal."""
77
+ return self.run_dir / "strategist_journal.jsonl"
78
+
79
+ @property
80
+ def integrity_hash_file(self) -> Path:
81
+ """Path to the integrity hash file (prompts + config + env vars)."""
82
+ return self.run_dir / "integrity_hash.txt"
83
+
84
+ @property
85
+ def prompt_hash_file(self) -> Path:
86
+ """Path to the legacy prompt hash file (backwards compatibility)."""
87
+ return self.run_dir / "prompt_hash.txt"
88
+
89
+ @property
90
+ def pre_debate_file(self) -> Path:
91
+ """Path to the pre-debate completion marker file."""
92
+ return self.run_dir / "pre_debate_complete.txt"
93
+
94
+ def is_pre_debate_complete(self) -> bool:
95
+ """Check if the pre-debate protocol has been completed.
96
+
97
+ Returns:
98
+ True if pre-debate is marked complete.
99
+ """
100
+ return self.pre_debate_file.exists()
101
+
102
+ def mark_pre_debate_complete(self) -> None:
103
+ """Mark the pre-debate protocol as complete."""
104
+ timestamp = datetime.now().strftime("%a %b %d %H:%M:%S %Y")
105
+ self.pre_debate_file.write_text(f"PRE_DEBATE_COMPLETE at {timestamp}\n")
106
+
107
+ def journal_response(self, phase: str, round_num: int, response: str) -> None:
108
+ """Append a Strategist response to the journal.
109
+
110
+ Journals are stored as JSONL (one JSON object per line) for easy parsing.
111
+ See REQUIREMENTS_V2.md Section 5 for journaling rationale.
112
+
113
+ Args:
114
+ phase: Phase identifier (e.g., "PHASE_5", "PHASE_6").
115
+ round_num: Round number within the phase (0 for initial).
116
+ response: The Strategist's response text.
117
+ """
118
+ entry = {
119
+ "timestamp": datetime.now().isoformat(),
120
+ "phase": phase,
121
+ "round": round_num,
122
+ "response_length": len(response),
123
+ "response": response,
124
+ }
125
+ with self.journal_path.open("a") as f:
126
+ f.write(json.dumps(entry) + "\n")
127
+
128
+ def completed_phases(self) -> set[str]:
129
+ """Get the set of completed phase names.
130
+
131
+ Returns:
132
+ Set of phase identifiers that have been completed.
133
+ """
134
+ if not self.checkpoint_file.exists():
135
+ return set()
136
+ content = self.checkpoint_file.read_text().strip()
137
+ if not content:
138
+ return set()
139
+ return set(content.split("\n"))
140
+
141
+ def mark_complete(self, phase_name: str) -> None:
142
+ """Mark a phase as complete.
143
+
144
+ Args:
145
+ phase_name: The phase identifier to mark complete.
146
+ """
147
+ with self.checkpoint_file.open("a") as f:
148
+ f.write(f"{phase_name}\n")
149
+
150
+ def log_status(self, message: str) -> None:
151
+ """Log a status message with timestamp.
152
+
153
+ Args:
154
+ message: The status message to log.
155
+ """
156
+ timestamp = datetime.now().strftime("%a %b %d %H:%M:%S %Y")
157
+ with self.status_file.open("a") as f:
158
+ f.write(f"{message} at {timestamp}\n")
159
+
160
+ def is_complete(self) -> bool:
161
+ """Check if the run is complete.
162
+
163
+ Returns:
164
+ True if the status file contains COMPLETED.
165
+ """
166
+ if not self.status_file.exists():
167
+ return False
168
+ return "COMPLETED" in self.status_file.read_text()
169
+
170
+
171
+ def create_run_from_content(
172
+ content: str,
173
+ runs_dir: Path,
174
+ config: Config,
175
+ source_name: str = "stdin",
176
+ ) -> RunContext:
177
+ """Create a new run directory from game plan content.
178
+
179
+ Args:
180
+ content: The game plan content as a string.
181
+ runs_dir: Directory to create runs in.
182
+ config: Configuration settings.
183
+ source_name: Name to record as the source (for manifest).
184
+
185
+ Returns:
186
+ RunContext for the new run.
187
+ """
188
+ # Create timestamped run directory
189
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
190
+ run_dir = runs_dir / timestamp
191
+ run_dir.mkdir(parents=True, mode=0o700)
192
+
193
+ # Write game plan content
194
+ (run_dir / "00_game_plan.md").write_text(content)
195
+
196
+ # Create manifest
197
+ manifest = {
198
+ "timestamp": datetime.now().isoformat(),
199
+ "user": os.environ.get("USER", "unknown"),
200
+ "hostname": os.uname().nodename,
201
+ "working_dir": str(Path.cwd()),
202
+ "script_version": "1.0.0",
203
+ "game_plan": source_name,
204
+ "game_plan_sha256": hashlib.sha256(content.encode()).hexdigest(),
205
+ "config": {
206
+ "critic_rounds": config.debate.critic_rounds,
207
+ "strategist_rounds": config.debate.strategist_rounds,
208
+ "default_timeout": config.models.default_timeout,
209
+ "max_attempts": config.retry.max_attempts,
210
+ },
211
+ }
212
+ (run_dir / "manifest.json").write_text(json.dumps(manifest, indent=2))
213
+
214
+ # Initialize files
215
+ (run_dir / "checkpoint.txt").touch()
216
+ (run_dir / "cli_errors.log").touch()
217
+ (run_dir / "status.txt").touch()
218
+
219
+ # Save integrity hash for resume validation
220
+ save_integrity_hash(run_dir)
221
+
222
+ # For stdin, we use a sentinel path that indicates content came from stdin
223
+ game_plan_path = run_dir / "00_game_plan.md"
224
+
225
+ context = RunContext(
226
+ run_dir=run_dir,
227
+ game_plan_path=game_plan_path,
228
+ config=config,
229
+ manifest=manifest,
230
+ )
231
+ context.log_status("STARTED")
232
+
233
+ return context
234
+
235
+
236
+ def create_run(
237
+ game_plan: Path,
238
+ runs_dir: Path,
239
+ config: Config,
240
+ ) -> RunContext:
241
+ """Create a new run directory from a game plan file.
242
+
243
+ Args:
244
+ game_plan: Path to the game plan file.
245
+ runs_dir: Directory to create runs in.
246
+ config: Configuration settings.
247
+
248
+ Returns:
249
+ RunContext for the new run.
250
+ """
251
+ content = game_plan.read_text()
252
+ context = create_run_from_content(
253
+ content=content,
254
+ runs_dir=runs_dir,
255
+ config=config,
256
+ source_name=str(game_plan),
257
+ )
258
+ # Update game_plan_path to point to original file (for integrity checks)
259
+ context.game_plan_path = game_plan
260
+ return context
261
+
262
+
263
+ def find_latest_incomplete_run(runs_dir: Path) -> Path | None:
264
+ """Find the most recent incomplete run directory.
265
+
266
+ Args:
267
+ runs_dir: Directory containing run subdirectories.
268
+
269
+ Returns:
270
+ Path to the run directory, or None if no incomplete runs.
271
+ """
272
+ if not runs_dir.exists():
273
+ return None
274
+
275
+ # Find all run directories (timestamped format)
276
+ run_dirs = sorted(
277
+ [d for d in runs_dir.iterdir() if d.is_dir() and d.name[:2].isdigit()],
278
+ reverse=True,
279
+ )
280
+
281
+ for run_dir in run_dirs:
282
+ status_file = run_dir / "status.txt"
283
+ if status_file.exists():
284
+ content = status_file.read_text()
285
+ if "COMPLETED" not in content:
286
+ return run_dir
287
+
288
+ return None
289
+
290
+
291
+ def load_run(run_dir: Path, config: Config) -> RunContext:
292
+ """Load an existing run context.
293
+
294
+ Args:
295
+ run_dir: Path to the run directory.
296
+ config: Configuration settings.
297
+
298
+ Returns:
299
+ RunContext for the existing run.
300
+ """
301
+ manifest_file = run_dir / "manifest.json"
302
+ manifest = {}
303
+ if manifest_file.exists():
304
+ manifest = json.loads(manifest_file.read_text())
305
+
306
+ game_plan_path = run_dir / "00_game_plan.md"
307
+
308
+ context = RunContext(
309
+ run_dir=run_dir,
310
+ game_plan_path=game_plan_path,
311
+ config=config,
312
+ manifest=manifest,
313
+ )
314
+ context.log_status("RESUMED")
315
+
316
+ return context
317
+
318
+
319
+ def verify_game_plan_integrity(context: RunContext) -> bool:
320
+ """Verify the game plan hasn't changed since run started.
321
+
322
+ Args:
323
+ context: The run context to verify.
324
+
325
+ Returns:
326
+ True if the game plan matches the original hash.
327
+ """
328
+ if "game_plan_sha256" not in context.manifest:
329
+ return True # No hash to verify
330
+
331
+ game_plan_path = context.run_dir / "00_game_plan.md"
332
+ if not game_plan_path.exists():
333
+ return False
334
+
335
+ current_hash = hashlib.sha256(game_plan_path.read_bytes()).hexdigest()
336
+ original_hash = str(context.manifest.get("game_plan_sha256", ""))
337
+
338
+ return current_hash == original_hash
339
+
340
+
341
+ # =============================================================================
342
+ # Integrity Hash Validation
343
+ # =============================================================================
344
+ # See REQUIREMENTS_V2.md Section 5 for rationale:
345
+ # A "Zombie Session" where history was generated with different prompts, config,
346
+ # or env vars produces unreliable outputs. We block resume if any of these changed.
347
+ # =============================================================================
348
+
349
+
350
+ def compute_integrity_hash(
351
+ prompts_dir: Path | None = None,
352
+ config_file: Path | None = None,
353
+ ) -> str:
354
+ """Compute SHA-256 hash of prompts, config, and relevant env vars.
355
+
356
+ Creates a deterministic hash by:
357
+ 1. Hashing all .j2 template files (sorted alphabetically)
358
+ 2. Hashing the config file contents
359
+ 3. Hashing relevant environment variables (sorted by name)
360
+
361
+ Args:
362
+ prompts_dir: Directory containing .j2 template files.
363
+ Defaults to package prompts directory.
364
+ config_file: Path to config file. Defaults to multi_model_debate.toml
365
+ in current directory.
366
+
367
+ Returns:
368
+ SHA-256 hex digest of all integrity-relevant state.
369
+ """
370
+ if prompts_dir is None:
371
+ prompts_dir = PROMPTS_DIR
372
+ if config_file is None:
373
+ config_file = Path.cwd() / DEFAULT_CONFIG_FILE
374
+
375
+ hasher = hashlib.sha256()
376
+
377
+ # 1. Hash prompt templates (sorted for deterministic ordering)
378
+ prompt_files = sorted(prompts_dir.glob("*.j2"))
379
+ for prompt_file in prompt_files:
380
+ # Include filename in hash to detect renames
381
+ hasher.update(b"PROMPT:")
382
+ hasher.update(prompt_file.name.encode())
383
+ hasher.update(prompt_file.read_bytes())
384
+
385
+ # 2. Hash config file if it exists
386
+ if config_file.exists():
387
+ hasher.update(b"CONFIG:")
388
+ hasher.update(config_file.read_bytes())
389
+
390
+ # 3. Hash relevant environment variables (sorted for determinism)
391
+ for var_name in sorted(INTEGRITY_ENV_VARS):
392
+ var_value = os.environ.get(var_name, "")
393
+ hasher.update(b"ENV:")
394
+ hasher.update(var_name.encode())
395
+ hasher.update(b"=")
396
+ hasher.update(var_value.encode())
397
+
398
+ return hasher.hexdigest()
399
+
400
+
401
+ def save_integrity_hash(
402
+ run_dir: Path,
403
+ prompts_dir: Path | None = None,
404
+ config_file: Path | None = None,
405
+ ) -> str:
406
+ """Compute and save integrity hash to run directory.
407
+
408
+ Called at debate start to record the state of prompts, config, and env vars.
409
+
410
+ Args:
411
+ run_dir: Run directory to save hash in.
412
+ prompts_dir: Directory containing .j2 template files.
413
+ config_file: Path to config file.
414
+
415
+ Returns:
416
+ The computed hash.
417
+ """
418
+ integrity_hash = compute_integrity_hash(prompts_dir, config_file)
419
+ (run_dir / "integrity_hash.txt").write_text(integrity_hash)
420
+ return integrity_hash
421
+
422
+
423
+ def validate_integrity_hash(
424
+ run_dir: Path,
425
+ prompts_dir: Path | None = None,
426
+ config_file: Path | None = None,
427
+ ) -> bool:
428
+ """Check if prompts, config, or env vars have changed since debate started.
429
+
430
+ Called on resume to detect modifications that would invalidate the debate.
431
+
432
+ Args:
433
+ run_dir: Run directory containing stored hash.
434
+ prompts_dir: Directory containing .j2 template files.
435
+ config_file: Path to config file.
436
+
437
+ Returns:
438
+ True if everything matches (safe to resume), False if changed.
439
+ """
440
+ # Check new integrity hash file first
441
+ integrity_hash_file = run_dir / "integrity_hash.txt"
442
+ if integrity_hash_file.exists():
443
+ stored_hash = integrity_hash_file.read_text().strip()
444
+ current_hash = compute_integrity_hash(prompts_dir, config_file)
445
+ return stored_hash == current_hash
446
+
447
+ # Fall back to legacy prompt_hash.txt for backwards compatibility
448
+ legacy_hash_file = run_dir / "prompt_hash.txt"
449
+ if legacy_hash_file.exists():
450
+ # Legacy runs only checked prompts, so only validate prompts
451
+ stored_hash = legacy_hash_file.read_text().strip()
452
+ current_hash = compute_prompt_hash(prompts_dir)
453
+ return stored_hash == current_hash
454
+
455
+ # No hash stored - assume safe (very old runs)
456
+ return True
457
+
458
+
459
+ # Legacy function for backwards compatibility with existing tests
460
+ def compute_prompt_hash(prompts_dir: Path | None = None) -> str:
461
+ """Compute SHA-256 hash of prompt templates only.
462
+
463
+ This is a legacy function for backwards compatibility.
464
+ New code should use compute_integrity_hash().
465
+
466
+ Args:
467
+ prompts_dir: Directory containing .j2 template files.
468
+
469
+ Returns:
470
+ SHA-256 hex digest of prompt templates.
471
+ """
472
+ if prompts_dir is None:
473
+ prompts_dir = PROMPTS_DIR
474
+
475
+ hasher = hashlib.sha256()
476
+ prompt_files = sorted(prompts_dir.glob("*.j2"))
477
+
478
+ for prompt_file in prompt_files:
479
+ hasher.update(prompt_file.name.encode())
480
+ hasher.update(prompt_file.read_bytes())
481
+
482
+ return hasher.hexdigest()
483
+
484
+
485
+ # Legacy functions for backwards compatibility
486
+ def save_prompt_hash(run_dir: Path, prompts_dir: Path | None = None) -> str:
487
+ """Legacy function - use save_integrity_hash() instead."""
488
+ return save_integrity_hash(run_dir, prompts_dir)
489
+
490
+
491
+ def validate_prompt_hash(run_dir: Path, prompts_dir: Path | None = None) -> bool:
492
+ """Legacy function - use validate_integrity_hash() instead."""
493
+ return validate_integrity_hash(run_dir, prompts_dir)
494
+
495
+
496
+ class PromptHashMismatchError(Exception):
497
+ """Raised when integrity validation fails.
498
+
499
+ This is a blocking error - the debate must be restarted.
500
+ See REQUIREMENTS_V2.md Section 5 for rationale.
501
+ """
502
+
503
+ def __init__(self, run_dir: Path) -> None:
504
+ self.run_dir = run_dir
505
+ super().__init__(
506
+ "Integrity check failed: prompts, config, or environment have changed.\n"
507
+ "A debate with changed state produces unreliable results.\n"
508
+ "Must restart debate from beginning."
509
+ )