multi-model-debate 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- multi_model_debate/__init__.py +4 -0
- multi_model_debate/__main__.py +6 -0
- multi_model_debate/cli.py +290 -0
- multi_model_debate/config.py +271 -0
- multi_model_debate/exceptions.py +83 -0
- multi_model_debate/models/__init__.py +71 -0
- multi_model_debate/models/claude.py +168 -0
- multi_model_debate/models/cli_wrapper.py +233 -0
- multi_model_debate/models/gemini.py +66 -0
- multi_model_debate/models/openai.py +66 -0
- multi_model_debate/models/protocols.py +35 -0
- multi_model_debate/orchestrator.py +465 -0
- multi_model_debate/phases/__init__.py +22 -0
- multi_model_debate/phases/base.py +236 -0
- multi_model_debate/phases/baseline.py +117 -0
- multi_model_debate/phases/debate.py +154 -0
- multi_model_debate/phases/defense.py +186 -0
- multi_model_debate/phases/final_position.py +307 -0
- multi_model_debate/phases/judge.py +177 -0
- multi_model_debate/phases/synthesis.py +162 -0
- multi_model_debate/pre_debate.py +83 -0
- multi_model_debate/prompts/arbiter_prompt.md.j2 +24 -0
- multi_model_debate/prompts/arbiter_summary.md.j2 +102 -0
- multi_model_debate/prompts/baseline_critique.md.j2 +5 -0
- multi_model_debate/prompts/critic_1_lens.md.j2 +52 -0
- multi_model_debate/prompts/critic_2_lens.md.j2 +52 -0
- multi_model_debate/prompts/debate_round.md.j2 +14 -0
- multi_model_debate/prompts/defense_initial.md.j2 +9 -0
- multi_model_debate/prompts/defense_round.md.j2 +8 -0
- multi_model_debate/prompts/judge.md.j2 +34 -0
- multi_model_debate/prompts/judge_prompt.md.j2 +13 -0
- multi_model_debate/prompts/strategist_proxy_lens.md.j2 +33 -0
- multi_model_debate/prompts/synthesis_prompt.md.j2 +16 -0
- multi_model_debate/prompts/synthesis_template.md.j2 +44 -0
- multi_model_debate/prompts/winner_response.md.j2 +17 -0
- multi_model_debate/response_parser.py +268 -0
- multi_model_debate/roles.py +163 -0
- multi_model_debate/storage/__init__.py +17 -0
- multi_model_debate/storage/run.py +509 -0
- multi_model_debate-1.0.1.dist-info/METADATA +572 -0
- multi_model_debate-1.0.1.dist-info/RECORD +44 -0
- multi_model_debate-1.0.1.dist-info/WHEEL +4 -0
- multi_model_debate-1.0.1.dist-info/entry_points.txt +2 -0
- multi_model_debate-1.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Model backends for CLI-based AI model invocations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
from multi_model_debate.models.claude import (
|
|
8
|
+
ClaudeBackend,
|
|
9
|
+
StrategistBackend,
|
|
10
|
+
create_claude_backend,
|
|
11
|
+
create_claude_interactive_backend, # Deprecated, kept for backwards compat
|
|
12
|
+
create_strategist_backend,
|
|
13
|
+
)
|
|
14
|
+
from multi_model_debate.models.cli_wrapper import CLIModelBackend
|
|
15
|
+
from multi_model_debate.models.gemini import GeminiBackend, create_gemini_backend
|
|
16
|
+
from multi_model_debate.models.openai import CodexBackend, create_codex_backend
|
|
17
|
+
from multi_model_debate.models.protocols import ModelBackend
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from multi_model_debate.config import CLICommandConfig, RetrySettings
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def create_cli_backend(
|
|
24
|
+
name: str,
|
|
25
|
+
cli_config: CLICommandConfig,
|
|
26
|
+
retry_config: RetrySettings,
|
|
27
|
+
min_response_length: int = 100,
|
|
28
|
+
default_timeout: int = 300,
|
|
29
|
+
) -> CLIModelBackend:
|
|
30
|
+
"""Create a CLI model backend for any model.
|
|
31
|
+
|
|
32
|
+
This is a generic factory that works with any CLI-based model.
|
|
33
|
+
Use this for dynamic model loading from config.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
name: Human-readable model name (e.g., 'GPT', 'Gemini', 'Claude').
|
|
37
|
+
cli_config: CLI command configuration from config.
|
|
38
|
+
retry_config: Retry settings for exponential backoff.
|
|
39
|
+
min_response_length: Minimum chars for valid response.
|
|
40
|
+
default_timeout: Global default timeout (per-model config takes priority).
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
A CLIModelBackend instance configured for the model.
|
|
44
|
+
"""
|
|
45
|
+
return CLIModelBackend(
|
|
46
|
+
name=name.upper(),
|
|
47
|
+
cli_config=cli_config,
|
|
48
|
+
retry_config=retry_config,
|
|
49
|
+
min_response_length=min_response_length,
|
|
50
|
+
default_timeout=default_timeout,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
__all__ = [
|
|
55
|
+
# Protocols
|
|
56
|
+
"ModelBackend",
|
|
57
|
+
# Base
|
|
58
|
+
"CLIModelBackend",
|
|
59
|
+
# Implementations
|
|
60
|
+
"CodexBackend",
|
|
61
|
+
"GeminiBackend",
|
|
62
|
+
"ClaudeBackend",
|
|
63
|
+
"StrategistBackend",
|
|
64
|
+
# Factories
|
|
65
|
+
"create_cli_backend",
|
|
66
|
+
"create_codex_backend",
|
|
67
|
+
"create_gemini_backend",
|
|
68
|
+
"create_claude_backend",
|
|
69
|
+
"create_strategist_backend",
|
|
70
|
+
"create_claude_interactive_backend", # Deprecated
|
|
71
|
+
]
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""Claude CLI model backend with Strategist support."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
|
|
10
|
+
from multi_model_debate.models.cli_wrapper import CLIModelBackend
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from multi_model_debate.config import CLICommandConfig, RetrySettings
|
|
14
|
+
|
|
15
|
+
console = Console()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ClaudeBackend(CLIModelBackend):
|
|
19
|
+
"""Claude CLI backend for non-interactive invocations.
|
|
20
|
+
|
|
21
|
+
Used for Phase 3 (judge) where Claude makes a one-shot determination.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
cli_config: CLICommandConfig,
|
|
27
|
+
retry_config: RetrySettings,
|
|
28
|
+
min_response_length: int = 100,
|
|
29
|
+
error_log: Path | None = None,
|
|
30
|
+
) -> None:
|
|
31
|
+
"""Initialize the Claude backend.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
cli_config: CLI command configuration.
|
|
35
|
+
retry_config: Retry settings for exponential backoff.
|
|
36
|
+
min_response_length: Minimum chars for valid response.
|
|
37
|
+
error_log: Optional path to log stderr output.
|
|
38
|
+
"""
|
|
39
|
+
super().__init__(
|
|
40
|
+
name="Claude",
|
|
41
|
+
cli_config=cli_config,
|
|
42
|
+
retry_config=retry_config,
|
|
43
|
+
min_response_length=min_response_length,
|
|
44
|
+
error_log=error_log,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class StrategistBackend(CLIModelBackend):
|
|
49
|
+
"""CLI-based backend for Strategist in Phases 5-6.
|
|
50
|
+
|
|
51
|
+
The Strategist is the AI assistant with full context that authored the
|
|
52
|
+
game plan. It defends the proposal automatically using CLI invocation,
|
|
53
|
+
just like other model backends.
|
|
54
|
+
|
|
55
|
+
DESIGN: Fully automated via CLI calls, same as other model backends.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
cli_config: CLICommandConfig,
|
|
61
|
+
retry_config: RetrySettings,
|
|
62
|
+
min_response_length: int = 100,
|
|
63
|
+
error_log: Path | None = None,
|
|
64
|
+
default_timeout: int = 300,
|
|
65
|
+
) -> None:
|
|
66
|
+
"""Initialize the Strategist backend.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
cli_config: CLI command configuration (uses claude CLI).
|
|
70
|
+
retry_config: Retry settings for exponential backoff.
|
|
71
|
+
min_response_length: Minimum chars for valid response.
|
|
72
|
+
error_log: Optional path to log stderr output.
|
|
73
|
+
default_timeout: Default timeout if not specified in cli_config.
|
|
74
|
+
"""
|
|
75
|
+
super().__init__(
|
|
76
|
+
name="Strategist",
|
|
77
|
+
cli_config=cli_config,
|
|
78
|
+
retry_config=retry_config,
|
|
79
|
+
min_response_length=min_response_length,
|
|
80
|
+
error_log=error_log,
|
|
81
|
+
default_timeout=default_timeout,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def create_claude_backend(
|
|
86
|
+
cli_config: CLICommandConfig,
|
|
87
|
+
retry_config: RetrySettings,
|
|
88
|
+
min_response_length: int = 100,
|
|
89
|
+
error_log: Path | None = None,
|
|
90
|
+
) -> ClaudeBackend:
|
|
91
|
+
"""Factory function to create a non-interactive Claude backend.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
cli_config: CLI command configuration.
|
|
95
|
+
retry_config: Retry settings.
|
|
96
|
+
min_response_length: Minimum response length.
|
|
97
|
+
error_log: Optional error log path.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
Configured ClaudeBackend instance.
|
|
101
|
+
"""
|
|
102
|
+
return ClaudeBackend(
|
|
103
|
+
cli_config=cli_config,
|
|
104
|
+
retry_config=retry_config,
|
|
105
|
+
min_response_length=min_response_length,
|
|
106
|
+
error_log=error_log,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def create_strategist_backend(
|
|
111
|
+
cli_config: CLICommandConfig,
|
|
112
|
+
retry_config: RetrySettings,
|
|
113
|
+
min_response_length: int = 100,
|
|
114
|
+
error_log: Path | None = None,
|
|
115
|
+
default_timeout: int = 300,
|
|
116
|
+
) -> StrategistBackend:
|
|
117
|
+
"""Factory function to create a Strategist backend.
|
|
118
|
+
|
|
119
|
+
The Strategist uses the configured CLI for automated responses.
|
|
120
|
+
See REQUIREMENTS_V2.md Section 4 for rationale on full automation.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
cli_config: CLI command configuration.
|
|
124
|
+
retry_config: Retry settings for exponential backoff.
|
|
125
|
+
min_response_length: Minimum response length.
|
|
126
|
+
error_log: Optional error log path.
|
|
127
|
+
default_timeout: Default timeout if not specified in cli_config.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
Configured StrategistBackend instance.
|
|
131
|
+
"""
|
|
132
|
+
return StrategistBackend(
|
|
133
|
+
cli_config=cli_config,
|
|
134
|
+
retry_config=retry_config,
|
|
135
|
+
min_response_length=min_response_length,
|
|
136
|
+
error_log=error_log,
|
|
137
|
+
default_timeout=default_timeout,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# Backwards compatibility alias (deprecated)
|
|
142
|
+
def create_claude_interactive_backend(
|
|
143
|
+
min_response_length: int = 100,
|
|
144
|
+
) -> StrategistBackend:
|
|
145
|
+
"""Deprecated: Use create_strategist_backend instead.
|
|
146
|
+
|
|
147
|
+
This function is kept for backwards compatibility but will be removed.
|
|
148
|
+
It creates a StrategistBackend with default CLI config.
|
|
149
|
+
"""
|
|
150
|
+
import warnings
|
|
151
|
+
|
|
152
|
+
from multi_model_debate.config import CLICommandConfig, RetrySettings
|
|
153
|
+
|
|
154
|
+
warnings.warn(
|
|
155
|
+
"create_claude_interactive_backend is deprecated, use create_strategist_backend",
|
|
156
|
+
DeprecationWarning,
|
|
157
|
+
stacklevel=2,
|
|
158
|
+
)
|
|
159
|
+
# Create with defaults - caller should use create_strategist_backend instead
|
|
160
|
+
default_cli = CLICommandConfig(
|
|
161
|
+
command="claude", input_mode="positional", flags=["-p", "--tools", "", "--"]
|
|
162
|
+
)
|
|
163
|
+
default_retry = RetrySettings(max_attempts=3, base_delay=30)
|
|
164
|
+
return StrategistBackend(
|
|
165
|
+
cli_config=default_cli,
|
|
166
|
+
retry_config=default_retry,
|
|
167
|
+
min_response_length=min_response_length,
|
|
168
|
+
)
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""Generic CLI subprocess wrapper with retry logic."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import subprocess
|
|
6
|
+
import sys
|
|
7
|
+
import time
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
from multi_model_debate.exceptions import (
|
|
13
|
+
ModelError,
|
|
14
|
+
ModelTimeoutError,
|
|
15
|
+
ModelValidationError,
|
|
16
|
+
)
|
|
17
|
+
from multi_model_debate.response_parser import is_valid_response
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from multi_model_debate.config import CLICommandConfig, RetrySettings
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class CLIResult:
|
|
25
|
+
"""Result of a CLI invocation."""
|
|
26
|
+
|
|
27
|
+
stdout: str
|
|
28
|
+
stderr: str
|
|
29
|
+
return_code: int
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class CLIModelBackend:
|
|
33
|
+
"""Generic wrapper for CLI-based model invocations.
|
|
34
|
+
|
|
35
|
+
Implements the ModelBackend protocol by invoking external CLI tools
|
|
36
|
+
via subprocess with retry logic and response validation.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
name: str,
|
|
42
|
+
cli_config: CLICommandConfig,
|
|
43
|
+
retry_config: RetrySettings,
|
|
44
|
+
min_response_length: int = 100,
|
|
45
|
+
error_log: Path | None = None,
|
|
46
|
+
default_timeout: int = 300,
|
|
47
|
+
) -> None:
|
|
48
|
+
"""Initialize the CLI model backend.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
name: Human-readable model name (e.g., 'GPT', 'Gemini').
|
|
52
|
+
cli_config: CLI command configuration.
|
|
53
|
+
retry_config: Retry settings for exponential backoff.
|
|
54
|
+
min_response_length: Minimum chars for valid response.
|
|
55
|
+
error_log: Optional path to log stderr output.
|
|
56
|
+
default_timeout: Default timeout if not specified in cli_config or call.
|
|
57
|
+
"""
|
|
58
|
+
self._name = name
|
|
59
|
+
self.cli_config = cli_config
|
|
60
|
+
self.retry_config = retry_config
|
|
61
|
+
self.min_response_length = min_response_length
|
|
62
|
+
self.error_log = error_log
|
|
63
|
+
# Per-model timeout from config, or global default
|
|
64
|
+
self.default_timeout = cli_config.timeout or default_timeout
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def name(self) -> str:
|
|
68
|
+
"""Human-readable model name."""
|
|
69
|
+
return self._name
|
|
70
|
+
|
|
71
|
+
def generate(self, prompt: str, timeout: int | None = None) -> str:
|
|
72
|
+
"""Execute CLI with retry logic.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
prompt: The input prompt.
|
|
76
|
+
timeout: Maximum time per attempt in seconds. If not specified,
|
|
77
|
+
uses the model's configured timeout.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
The validated response text.
|
|
81
|
+
|
|
82
|
+
Raises:
|
|
83
|
+
ModelError: If all retry attempts fail.
|
|
84
|
+
ModelTimeoutError: If timeout exceeded.
|
|
85
|
+
ModelValidationError: If response fails validation.
|
|
86
|
+
"""
|
|
87
|
+
# Use per-model timeout if not explicitly provided
|
|
88
|
+
if timeout is None:
|
|
89
|
+
timeout = self.default_timeout
|
|
90
|
+
|
|
91
|
+
last_error: Exception | None = None
|
|
92
|
+
|
|
93
|
+
for attempt in range(self.retry_config.max_attempts):
|
|
94
|
+
try:
|
|
95
|
+
result = self._execute(prompt, timeout)
|
|
96
|
+
response = result.stdout
|
|
97
|
+
self._validate_response(response)
|
|
98
|
+
return response
|
|
99
|
+
|
|
100
|
+
except subprocess.TimeoutExpired as e:
|
|
101
|
+
last_error = ModelTimeoutError(f"{self.name} timed out after {timeout}s")
|
|
102
|
+
self._log_error(f"Attempt {attempt + 1}: Timeout - {e}")
|
|
103
|
+
|
|
104
|
+
except ModelError as e:
|
|
105
|
+
last_error = e
|
|
106
|
+
self._log_error(f"Attempt {attempt + 1}: {e}")
|
|
107
|
+
|
|
108
|
+
# Exponential backoff before retry
|
|
109
|
+
if attempt < self.retry_config.max_attempts - 1:
|
|
110
|
+
delay = self.retry_config.base_delay * (2**attempt)
|
|
111
|
+
time.sleep(delay)
|
|
112
|
+
|
|
113
|
+
raise ModelError(
|
|
114
|
+
f"{self.name} failed after {self.retry_config.max_attempts} attempts"
|
|
115
|
+
) from last_error
|
|
116
|
+
|
|
117
|
+
def _execute(self, prompt: str, timeout: int) -> CLIResult:
|
|
118
|
+
"""Execute the CLI command.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
prompt: The input prompt.
|
|
122
|
+
timeout: Maximum time to wait in seconds.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
CLIResult with stdout, stderr, and return code.
|
|
126
|
+
|
|
127
|
+
Raises:
|
|
128
|
+
ModelError: If command returns non-zero exit code.
|
|
129
|
+
subprocess.TimeoutExpired: If timeout exceeded.
|
|
130
|
+
"""
|
|
131
|
+
cmd = self._build_command(prompt)
|
|
132
|
+
|
|
133
|
+
if self.cli_config.input_mode == "stdin":
|
|
134
|
+
result = subprocess.run(
|
|
135
|
+
cmd,
|
|
136
|
+
input=prompt,
|
|
137
|
+
capture_output=True,
|
|
138
|
+
text=True,
|
|
139
|
+
timeout=timeout,
|
|
140
|
+
)
|
|
141
|
+
else:
|
|
142
|
+
result = subprocess.run(
|
|
143
|
+
cmd,
|
|
144
|
+
capture_output=True,
|
|
145
|
+
text=True,
|
|
146
|
+
timeout=timeout,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
cli_result = CLIResult(
|
|
150
|
+
stdout=result.stdout,
|
|
151
|
+
stderr=result.stderr,
|
|
152
|
+
return_code=result.returncode,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# Log stderr if present
|
|
156
|
+
if result.stderr:
|
|
157
|
+
self._log_error(f"stderr: {result.stderr}")
|
|
158
|
+
|
|
159
|
+
if result.returncode != 0:
|
|
160
|
+
raise ModelError(f"{self.name} returned exit code {result.returncode}: {result.stderr}")
|
|
161
|
+
|
|
162
|
+
return cli_result
|
|
163
|
+
|
|
164
|
+
def _build_command(self, prompt: str) -> list[str]:
|
|
165
|
+
"""Build the command list for subprocess.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
prompt: The input prompt (used for positional mode).
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
List of command arguments.
|
|
172
|
+
"""
|
|
173
|
+
cmd = [self.cli_config.command]
|
|
174
|
+
|
|
175
|
+
if self.cli_config.subcommand:
|
|
176
|
+
cmd.append(self.cli_config.subcommand)
|
|
177
|
+
|
|
178
|
+
if self.cli_config.flags:
|
|
179
|
+
cmd.extend(self.cli_config.flags)
|
|
180
|
+
|
|
181
|
+
if self.cli_config.input_mode == "stdin":
|
|
182
|
+
cmd.append("-")
|
|
183
|
+
else:
|
|
184
|
+
cmd.append(prompt)
|
|
185
|
+
|
|
186
|
+
return cmd
|
|
187
|
+
|
|
188
|
+
def _validate_response(self, response: str) -> None:
|
|
189
|
+
"""Validate the response meets quality criteria.
|
|
190
|
+
|
|
191
|
+
Uses JSON-aware validation that accepts:
|
|
192
|
+
- Valid JSON responses (any length)
|
|
193
|
+
- Legacy "NO NEW ISSUES" format (backwards compatibility)
|
|
194
|
+
- Non-JSON responses meeting minimum length
|
|
195
|
+
|
|
196
|
+
See REQUIREMENTS_V2.md Section 6 for structured output rationale.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
response: The response text to validate.
|
|
200
|
+
|
|
201
|
+
Raises:
|
|
202
|
+
ModelValidationError: If validation fails.
|
|
203
|
+
"""
|
|
204
|
+
if not response:
|
|
205
|
+
raise ModelValidationError(f"{self.name} returned empty response")
|
|
206
|
+
|
|
207
|
+
# Use JSON-aware validation from response_parser
|
|
208
|
+
if is_valid_response(response, self.min_response_length):
|
|
209
|
+
return
|
|
210
|
+
|
|
211
|
+
# Provide detailed error with response preview for debugging
|
|
212
|
+
preview = response[:200] + "..." if len(response) > 200 else response
|
|
213
|
+
raise ModelValidationError(
|
|
214
|
+
f"{self.name} response too short ({len(response)} chars, "
|
|
215
|
+
f"min {self.min_response_length}). Preview: {preview!r}"
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
def _log_error(self, message: str) -> None:
|
|
219
|
+
"""Log an error message to stderr and optionally to file.
|
|
220
|
+
|
|
221
|
+
Always logs to stderr for visibility during debugging.
|
|
222
|
+
Also logs to error_log file if configured.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
message: The error message to log.
|
|
226
|
+
"""
|
|
227
|
+
# Always log to stderr for debugging visibility
|
|
228
|
+
print(f"[{self.name}] {message}", file=sys.stderr)
|
|
229
|
+
|
|
230
|
+
# Also log to file if configured
|
|
231
|
+
if self.error_log:
|
|
232
|
+
with open(self.error_log, "a") as f:
|
|
233
|
+
f.write(f"[{self.name}] {message}\n")
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Google Gemini CLI model backend."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from multi_model_debate.models.cli_wrapper import CLIModelBackend
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from multi_model_debate.config import CLICommandConfig, RetrySettings
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class GeminiBackend(CLIModelBackend):
|
|
15
|
+
"""Gemini CLI backend for Google model invocations.
|
|
16
|
+
|
|
17
|
+
Wraps the `gemini <prompt>` command which takes prompt as positional argument.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
cli_config: CLICommandConfig,
|
|
23
|
+
retry_config: RetrySettings,
|
|
24
|
+
min_response_length: int = 100,
|
|
25
|
+
error_log: Path | None = None,
|
|
26
|
+
) -> None:
|
|
27
|
+
"""Initialize the Gemini backend.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
cli_config: CLI command configuration.
|
|
31
|
+
retry_config: Retry settings for exponential backoff.
|
|
32
|
+
min_response_length: Minimum chars for valid response.
|
|
33
|
+
error_log: Optional path to log stderr output.
|
|
34
|
+
"""
|
|
35
|
+
super().__init__(
|
|
36
|
+
name="Gemini",
|
|
37
|
+
cli_config=cli_config,
|
|
38
|
+
retry_config=retry_config,
|
|
39
|
+
min_response_length=min_response_length,
|
|
40
|
+
error_log=error_log,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def create_gemini_backend(
|
|
45
|
+
cli_config: CLICommandConfig,
|
|
46
|
+
retry_config: RetrySettings,
|
|
47
|
+
min_response_length: int = 100,
|
|
48
|
+
error_log: Path | None = None,
|
|
49
|
+
) -> GeminiBackend:
|
|
50
|
+
"""Factory function to create a Gemini backend.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
cli_config: CLI command configuration.
|
|
54
|
+
retry_config: Retry settings.
|
|
55
|
+
min_response_length: Minimum response length.
|
|
56
|
+
error_log: Optional error log path.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Configured GeminiBackend instance.
|
|
60
|
+
"""
|
|
61
|
+
return GeminiBackend(
|
|
62
|
+
cli_config=cli_config,
|
|
63
|
+
retry_config=retry_config,
|
|
64
|
+
min_response_length=min_response_length,
|
|
65
|
+
error_log=error_log,
|
|
66
|
+
)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""OpenAI/Codex CLI model backend."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from multi_model_debate.models.cli_wrapper import CLIModelBackend
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from multi_model_debate.config import CLICommandConfig, RetrySettings
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class CodexBackend(CLIModelBackend):
|
|
15
|
+
"""Codex CLI backend for GPT model invocations.
|
|
16
|
+
|
|
17
|
+
Wraps the `codex exec -` command which reads prompts from stdin.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
cli_config: CLICommandConfig,
|
|
23
|
+
retry_config: RetrySettings,
|
|
24
|
+
min_response_length: int = 100,
|
|
25
|
+
error_log: Path | None = None,
|
|
26
|
+
) -> None:
|
|
27
|
+
"""Initialize the Codex backend.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
cli_config: CLI command configuration (typically codex exec).
|
|
31
|
+
retry_config: Retry settings for exponential backoff.
|
|
32
|
+
min_response_length: Minimum chars for valid response.
|
|
33
|
+
error_log: Optional path to log stderr output.
|
|
34
|
+
"""
|
|
35
|
+
super().__init__(
|
|
36
|
+
name="GPT",
|
|
37
|
+
cli_config=cli_config,
|
|
38
|
+
retry_config=retry_config,
|
|
39
|
+
min_response_length=min_response_length,
|
|
40
|
+
error_log=error_log,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def create_codex_backend(
|
|
45
|
+
cli_config: CLICommandConfig,
|
|
46
|
+
retry_config: RetrySettings,
|
|
47
|
+
min_response_length: int = 100,
|
|
48
|
+
error_log: Path | None = None,
|
|
49
|
+
) -> CodexBackend:
|
|
50
|
+
"""Factory function to create a Codex backend.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
cli_config: CLI command configuration.
|
|
54
|
+
retry_config: Retry settings.
|
|
55
|
+
min_response_length: Minimum response length.
|
|
56
|
+
error_log: Optional error log path.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Configured CodexBackend instance.
|
|
60
|
+
"""
|
|
61
|
+
return CodexBackend(
|
|
62
|
+
cli_config=cli_config,
|
|
63
|
+
retry_config=retry_config,
|
|
64
|
+
min_response_length=min_response_length,
|
|
65
|
+
error_log=error_log,
|
|
66
|
+
)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Protocol definitions for model backends."""
|
|
2
|
+
|
|
3
|
+
from typing import Protocol, runtime_checkable
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@runtime_checkable
|
|
7
|
+
class ModelBackend(Protocol):
|
|
8
|
+
"""Protocol for model backends that can generate responses.
|
|
9
|
+
|
|
10
|
+
All models (including Strategist) implement this protocol using
|
|
11
|
+
CLI-based invocation. See REQUIREMENTS_V2.md Section 4.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
@property
|
|
15
|
+
def name(self) -> str:
|
|
16
|
+
"""Human-readable model name (e.g., 'GPT', 'Gemini', 'Strategist')."""
|
|
17
|
+
...
|
|
18
|
+
|
|
19
|
+
def generate(self, prompt: str, timeout: int | None = None) -> str:
|
|
20
|
+
"""Generate a response from the model.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
prompt: The input prompt.
|
|
24
|
+
timeout: Maximum time to wait in seconds. If not specified,
|
|
25
|
+
uses the model's configured default timeout.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
The model's response text.
|
|
29
|
+
|
|
30
|
+
Raises:
|
|
31
|
+
ModelError: If generation fails after retries.
|
|
32
|
+
ModelTimeoutError: If timeout is exceeded.
|
|
33
|
+
ModelValidationError: If response fails validation.
|
|
34
|
+
"""
|
|
35
|
+
...
|