delimit-cli 3.4.0 → 3.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/delimit-setup.js +23 -0
- package/gateway/ai/backends/tools_data.py +830 -0
- package/gateway/ai/backends/tools_design.py +921 -0
- package/gateway/ai/backends/tools_infra.py +866 -0
- package/gateway/ai/backends/tools_real.py +766 -0
- package/gateway/ai/backends/ui_bridge.py +26 -49
- package/gateway/ai/deliberation.py +387 -0
- package/gateway/ai/ledger_manager.py +207 -0
- package/gateway/ai/server.py +630 -216
- package/package.json +1 -1
|
@@ -42,53 +42,35 @@ def _call(pkg: str, factory_name: str, method: str, args: Dict, tool_label: str)
|
|
|
42
42
|
return {"tool": tool_label, "error": str(e)}
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
# ─── DesignSystem (
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def design_generate_component(component_name: str, figma_node_id: Optional[str] = None, output_path: Optional[str] = None) -> Dict[str, Any]:
|
|
58
|
-
return {"tool": "design.generate_component", "component_name": component_name, "status": "pass-through"}
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
def design_generate_tailwind(figma_file_key: str, output_path: Optional[str] = None) -> Dict[str, Any]:
|
|
62
|
-
return {"tool": "design.generate_tailwind", "figma_file_key": figma_file_key, "status": "pass-through"}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
def design_component_library(project_path: str, output_format: str = "json") -> Dict[str, Any]:
|
|
66
|
-
return {"tool": "design.component_library", "project_path": project_path, "status": "pass-through"}
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
# ─── Storybook (custom classes, no BaseMCPServer) ─────────────────────
|
|
70
|
-
|
|
71
|
-
def story_generate(component_path: str, story_name: Optional[str] = None, variants: Optional[List[str]] = None) -> Dict[str, Any]:
|
|
72
|
-
return {"tool": "story.generate", "component_path": component_path, "status": "pass-through"}
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def story_visual_test(url: str, project_path: Optional[str] = None, threshold: float = 0.05) -> Dict[str, Any]:
|
|
76
|
-
return {"tool": "story.visual_test", "url": url, "status": "pass-through"}
|
|
45
|
+
# ─── DesignSystem (real implementations in tools_design.py) ────────────
|
|
46
|
+
from .tools_design import (
|
|
47
|
+
design_extract_tokens,
|
|
48
|
+
design_generate_component,
|
|
49
|
+
design_generate_tailwind,
|
|
50
|
+
design_validate_responsive,
|
|
51
|
+
design_component_library,
|
|
52
|
+
story_generate,
|
|
53
|
+
story_visual_test,
|
|
54
|
+
story_accessibility,
|
|
55
|
+
)
|
|
77
56
|
|
|
78
57
|
|
|
79
58
|
def story_build(project_path: str, output_dir: Optional[str] = None) -> Dict[str, Any]:
|
|
80
|
-
|
|
59
|
+
"""Story build remains a stub — requires Storybook installed."""
|
|
60
|
+
return {"tool": "story.build", "project_path": project_path, "status": "not_available",
|
|
61
|
+
"message": "Storybook build requires Storybook installed. Run: npx storybook init"}
|
|
81
62
|
|
|
82
63
|
|
|
83
64
|
def story_accessibility_test(project_path: str, standards: str = "WCAG2AA") -> Dict[str, Any]:
|
|
84
|
-
|
|
65
|
+
"""Delegate to story_accessibility (renamed for backward compat)."""
|
|
66
|
+
return story_accessibility(project_path=project_path, standards=standards)
|
|
85
67
|
|
|
86
68
|
|
|
87
|
-
# ─── TestSmith (
|
|
69
|
+
# ─── TestSmith (Real implementations — tools_real.py) ─────────────────
|
|
88
70
|
|
|
89
71
|
def test_generate(project_path: str, source_files: Optional[List[str]] = None, framework: str = "jest") -> Dict[str, Any]:
|
|
90
|
-
|
|
91
|
-
|
|
72
|
+
from .tools_real import test_generate as _real_test_generate
|
|
73
|
+
return _real_test_generate(project_path=project_path, source_files=source_files, framework=framework)
|
|
92
74
|
|
|
93
75
|
|
|
94
76
|
def test_coverage(project_path: str, threshold: int = 80) -> Dict[str, Any]:
|
|
@@ -97,22 +79,17 @@ def test_coverage(project_path: str, threshold: int = 80) -> Dict[str, Any]:
|
|
|
97
79
|
|
|
98
80
|
|
|
99
81
|
def test_smoke(project_path: str, test_suite: Optional[str] = None) -> Dict[str, Any]:
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
# Guard against stub that says "passed" with 0 tests actually run
|
|
103
|
-
if result.get("tests_run", -1) == 0 and result.get("passed") is True:
|
|
104
|
-
return {"tool": "test.smoke", "status": "no_tests",
|
|
105
|
-
"error": "No smoke tests configured. The test runner found 0 tests to execute."}
|
|
106
|
-
return result
|
|
82
|
+
from .tools_real import test_smoke as _real_test_smoke
|
|
83
|
+
return _real_test_smoke(project_path=project_path, test_suite=test_suite)
|
|
107
84
|
|
|
108
85
|
|
|
109
|
-
# ─── DocsWeaver (
|
|
86
|
+
# ─── DocsWeaver (Real implementations — tools_real.py) ────────────────
|
|
110
87
|
|
|
111
88
|
def docs_generate(target: str = ".", options: Optional[Dict] = None) -> Dict[str, Any]:
|
|
112
|
-
|
|
113
|
-
|
|
89
|
+
from .tools_real import docs_generate as _real_docs_generate
|
|
90
|
+
return _real_docs_generate(target=target, options=options)
|
|
114
91
|
|
|
115
92
|
|
|
116
93
|
def docs_validate(target: str = ".", options: Optional[Dict] = None) -> Dict[str, Any]:
|
|
117
|
-
|
|
118
|
-
|
|
94
|
+
from .tools_real import docs_validate as _real_docs_validate
|
|
95
|
+
return _real_docs_validate(target=target, options=options)
|
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Delimit Deliberation Engine — Multi-round consensus with real model-to-model debate.
|
|
3
|
+
|
|
4
|
+
Passes each model's EXACT raw response to the other models for counter-arguments.
|
|
5
|
+
Rounds continue until unanimous agreement or max rounds reached.
|
|
6
|
+
|
|
7
|
+
Models are configured via ~/.delimit/models.json — users choose which AI models
|
|
8
|
+
to include in deliberations. Supports any OpenAI-compatible API.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
import shutil
|
|
15
|
+
import time
|
|
16
|
+
import urllib.request
|
|
17
|
+
import urllib.error
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any, Dict, List, Optional
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger("delimit.deliberation")
|
|
22
|
+
|
|
23
|
+
DELIBERATION_DIR = Path.home() / ".delimit" / "deliberations"
|
|
24
|
+
MODELS_CONFIG = Path.home() / ".delimit" / "models.json"
|
|
25
|
+
|
|
26
|
+
DEFAULT_MODELS = {
|
|
27
|
+
"grok": {
|
|
28
|
+
"name": "Grok 4",
|
|
29
|
+
"api_url": "https://api.x.ai/v1/chat/completions",
|
|
30
|
+
"model": "grok-4-0709",
|
|
31
|
+
"env_key": "XAI_API_KEY",
|
|
32
|
+
"enabled": False,
|
|
33
|
+
},
|
|
34
|
+
"gemini": {
|
|
35
|
+
"name": "Gemini 2.5 Flash",
|
|
36
|
+
"api_url": "https://us-central1-aiplatform.googleapis.com/v1/projects/{project}/locations/us-central1/publishers/google/models/gemini-2.5-flash:generateContent",
|
|
37
|
+
"model": "gemini-2.5-flash",
|
|
38
|
+
"env_key": "GOOGLE_APPLICATION_CREDENTIALS",
|
|
39
|
+
"enabled": False,
|
|
40
|
+
"format": "vertex_ai",
|
|
41
|
+
},
|
|
42
|
+
"codex": {
|
|
43
|
+
"name": "Codex (GPT-5.4)",
|
|
44
|
+
"format": "codex_cli",
|
|
45
|
+
"model": "gpt-5.4",
|
|
46
|
+
"env_key": "CODEX_CLI",
|
|
47
|
+
"enabled": False,
|
|
48
|
+
},
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_models_config() -> Dict[str, Any]:
|
|
53
|
+
"""Load model configuration. Auto-detects available API keys."""
|
|
54
|
+
if MODELS_CONFIG.exists():
|
|
55
|
+
try:
|
|
56
|
+
return json.loads(MODELS_CONFIG.read_text())
|
|
57
|
+
except Exception:
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
# Auto-detect from environment
|
|
61
|
+
config = {}
|
|
62
|
+
for model_id, defaults in DEFAULT_MODELS.items():
|
|
63
|
+
if defaults.get("format") == "codex_cli":
|
|
64
|
+
# Codex: check if CLI is available
|
|
65
|
+
import shutil
|
|
66
|
+
codex_path = shutil.which("codex")
|
|
67
|
+
config[model_id] = {
|
|
68
|
+
**defaults,
|
|
69
|
+
"enabled": codex_path is not None,
|
|
70
|
+
"codex_path": codex_path or "",
|
|
71
|
+
}
|
|
72
|
+
else:
|
|
73
|
+
key = os.environ.get(defaults["env_key"], "")
|
|
74
|
+
config[model_id] = {
|
|
75
|
+
**defaults,
|
|
76
|
+
"api_key": key,
|
|
77
|
+
"enabled": bool(key),
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return config
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def configure_models() -> Dict[str, Any]:
|
|
84
|
+
"""Return current model configuration and what's available."""
|
|
85
|
+
config = get_models_config()
|
|
86
|
+
available = {k: v for k, v in config.items() if v.get("enabled")}
|
|
87
|
+
missing = {k: v for k, v in config.items() if not v.get("enabled")}
|
|
88
|
+
|
|
89
|
+
return {
|
|
90
|
+
"configured_models": list(available.keys()),
|
|
91
|
+
"missing_models": {k: f"Set {v['env_key']} environment variable" for k, v in missing.items()},
|
|
92
|
+
"config_path": str(MODELS_CONFIG),
|
|
93
|
+
"note": "Add API keys to enable more models for deliberation. "
|
|
94
|
+
"Set env vars or create ~/.delimit/models.json",
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _call_codex(prompt: str, system_prompt: str = "") -> str:
|
|
99
|
+
"""Call Codex via CLI subprocess."""
|
|
100
|
+
import subprocess
|
|
101
|
+
codex_path = shutil.which("codex")
|
|
102
|
+
if not codex_path:
|
|
103
|
+
return "[Codex unavailable — codex CLI not found in PATH]"
|
|
104
|
+
|
|
105
|
+
full_prompt = f"{system_prompt}\n\n{prompt}" if system_prompt else prompt
|
|
106
|
+
try:
|
|
107
|
+
result = subprocess.run(
|
|
108
|
+
[codex_path, "exec", "--dangerously-bypass-approvals-and-sandbox", full_prompt],
|
|
109
|
+
capture_output=True,
|
|
110
|
+
text=True,
|
|
111
|
+
timeout=120,
|
|
112
|
+
)
|
|
113
|
+
output = result.stdout.strip()
|
|
114
|
+
if not output and result.stderr:
|
|
115
|
+
return f"[Codex error: {result.stderr[:300]}]"
|
|
116
|
+
return output or "[Codex returned empty response]"
|
|
117
|
+
except subprocess.TimeoutExpired:
|
|
118
|
+
return "[Codex timed out after 120s]"
|
|
119
|
+
except Exception as e:
|
|
120
|
+
return f"[Codex error: {e}]"
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _call_model(model_id: str, config: Dict, prompt: str, system_prompt: str = "") -> str:
|
|
124
|
+
"""Call any supported model — OpenAI-compatible API, Vertex AI, or Codex CLI."""
|
|
125
|
+
fmt = config.get("format", "openai")
|
|
126
|
+
|
|
127
|
+
# Codex uses CLI, not HTTP API
|
|
128
|
+
if fmt == "codex_cli":
|
|
129
|
+
return _call_codex(prompt, system_prompt)
|
|
130
|
+
|
|
131
|
+
api_key = config.get("api_key") or os.environ.get(config.get("env_key", ""), "")
|
|
132
|
+
# Vertex AI uses service account auth, not API key
|
|
133
|
+
if not api_key and fmt != "vertex_ai":
|
|
134
|
+
return f"[{config.get('name', model_id)} unavailable — {config.get('env_key')} not set]"
|
|
135
|
+
|
|
136
|
+
api_url = config["api_url"]
|
|
137
|
+
model = config.get("model", "")
|
|
138
|
+
|
|
139
|
+
try:
|
|
140
|
+
if fmt == "vertex_ai":
|
|
141
|
+
# Vertex AI format — use google-auth for access token
|
|
142
|
+
try:
|
|
143
|
+
import google.auth
|
|
144
|
+
import google.auth.transport.requests
|
|
145
|
+
# Explicitly set credentials path if not in env
|
|
146
|
+
creds_path = "/root/.config/gcloud/application_default_credentials.json"
|
|
147
|
+
if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") and os.path.exists(creds_path):
|
|
148
|
+
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = creds_path
|
|
149
|
+
creds, project = google.auth.default()
|
|
150
|
+
creds.refresh(google.auth.transport.requests.Request())
|
|
151
|
+
actual_url = api_url.replace("{project}", project or os.environ.get("GOOGLE_CLOUD_PROJECT", "jamsons"))
|
|
152
|
+
data = json.dumps({
|
|
153
|
+
"contents": [{"role": "user", "parts": [{"text": f"{system_prompt}\n\n{prompt}" if system_prompt else prompt}]}],
|
|
154
|
+
"generationConfig": {"maxOutputTokens": 4096, "temperature": 0.7},
|
|
155
|
+
}).encode()
|
|
156
|
+
req = urllib.request.Request(
|
|
157
|
+
actual_url,
|
|
158
|
+
data=data,
|
|
159
|
+
headers={
|
|
160
|
+
"Authorization": f"Bearer {creds.token}",
|
|
161
|
+
"Content-Type": "application/json",
|
|
162
|
+
},
|
|
163
|
+
method="POST",
|
|
164
|
+
)
|
|
165
|
+
except ImportError:
|
|
166
|
+
return f"[Gemini unavailable — install google-auth: pip install google-auth]"
|
|
167
|
+
elif fmt == "google":
|
|
168
|
+
# Google Generative AI format (API key)
|
|
169
|
+
data = json.dumps({
|
|
170
|
+
"contents": [{"role": "user", "parts": [{"text": f"{system_prompt}\n\n{prompt}" if system_prompt else prompt}]}],
|
|
171
|
+
"generationConfig": {"maxOutputTokens": 4096, "temperature": 0.7},
|
|
172
|
+
}).encode()
|
|
173
|
+
req = urllib.request.Request(
|
|
174
|
+
f"{api_url}?key={api_key}",
|
|
175
|
+
data=data,
|
|
176
|
+
headers={"Content-Type": "application/json"},
|
|
177
|
+
method="POST",
|
|
178
|
+
)
|
|
179
|
+
else:
|
|
180
|
+
# OpenAI-compatible format (works for xAI, OpenAI, etc.)
|
|
181
|
+
messages = []
|
|
182
|
+
if system_prompt:
|
|
183
|
+
messages.append({"role": "system", "content": system_prompt})
|
|
184
|
+
messages.append({"role": "user", "content": prompt})
|
|
185
|
+
|
|
186
|
+
data = json.dumps({
|
|
187
|
+
"model": model,
|
|
188
|
+
"messages": messages,
|
|
189
|
+
"temperature": 0.7,
|
|
190
|
+
"max_tokens": 4096,
|
|
191
|
+
}).encode()
|
|
192
|
+
req = urllib.request.Request(
|
|
193
|
+
api_url,
|
|
194
|
+
data=data,
|
|
195
|
+
headers={
|
|
196
|
+
"Authorization": f"Bearer {api_key}",
|
|
197
|
+
"Content-Type": "application/json",
|
|
198
|
+
"User-Agent": "Delimit/3.3.0",
|
|
199
|
+
},
|
|
200
|
+
method="POST",
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
with urllib.request.urlopen(req, timeout=120) as resp:
|
|
204
|
+
result = json.loads(resp.read())
|
|
205
|
+
|
|
206
|
+
if fmt in ("google", "vertex_ai"):
|
|
207
|
+
return result["candidates"][0]["content"]["parts"][0]["text"]
|
|
208
|
+
else:
|
|
209
|
+
return result["choices"][0]["message"]["content"]
|
|
210
|
+
|
|
211
|
+
except Exception as e:
|
|
212
|
+
return f"[{config.get('name', model_id)} error: {e}]"
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def deliberate(
|
|
216
|
+
question: str,
|
|
217
|
+
context: str = "",
|
|
218
|
+
max_rounds: int = 3,
|
|
219
|
+
mode: str = "dialogue",
|
|
220
|
+
require_unanimous: bool = True,
|
|
221
|
+
save_path: Optional[str] = None,
|
|
222
|
+
) -> Dict[str, Any]:
|
|
223
|
+
"""
|
|
224
|
+
Run a multi-round deliberation across all configured AI models.
|
|
225
|
+
|
|
226
|
+
Modes:
|
|
227
|
+
- "debate": Long-form essays, models respond to each other's full arguments (3 rounds default)
|
|
228
|
+
- "dialogue": Short conversational turns, models build on each other like a group chat (6 rounds default)
|
|
229
|
+
|
|
230
|
+
Returns the full deliberation transcript + final verdict.
|
|
231
|
+
"""
|
|
232
|
+
DELIBERATION_DIR.mkdir(parents=True, exist_ok=True)
|
|
233
|
+
|
|
234
|
+
config = get_models_config()
|
|
235
|
+
enabled_models = {k: v for k, v in config.items() if v.get("enabled")}
|
|
236
|
+
|
|
237
|
+
if len(enabled_models) < 2:
|
|
238
|
+
return {
|
|
239
|
+
"error": "Need at least 2 AI models for deliberation.",
|
|
240
|
+
"configured": list(enabled_models.keys()),
|
|
241
|
+
"missing": {k: f"Set {v.get('env_key', 'key')}" for k, v in config.items() if not v.get("enabled")},
|
|
242
|
+
"tip": "Set API key environment variables or create ~/.delimit/models.json",
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
model_ids = list(enabled_models.keys())
|
|
246
|
+
|
|
247
|
+
# Dialogue mode uses more rounds with shorter responses
|
|
248
|
+
if mode == "dialogue" and max_rounds == 3:
|
|
249
|
+
max_rounds = 6
|
|
250
|
+
|
|
251
|
+
transcript = {
|
|
252
|
+
"question": question,
|
|
253
|
+
"context": context,
|
|
254
|
+
"mode": mode,
|
|
255
|
+
"models": model_ids,
|
|
256
|
+
"started_at": time.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
257
|
+
"rounds": [],
|
|
258
|
+
"thread": [], # flat conversation thread for dialogue mode
|
|
259
|
+
"unanimous": False,
|
|
260
|
+
"final_verdict": None,
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
if mode == "dialogue":
|
|
264
|
+
system_prompt = (
|
|
265
|
+
"You are in a group chat with other AI models. Keep responses to 2-4 sentences. "
|
|
266
|
+
"Be direct and conversational — this is a discussion, not an essay. "
|
|
267
|
+
"Build on what others said. Disagree specifically if you disagree. "
|
|
268
|
+
"When you're ready to agree, say VERDICT: AGREE. "
|
|
269
|
+
"If you disagree, say VERDICT: DISAGREE — [why in one sentence]."
|
|
270
|
+
)
|
|
271
|
+
else:
|
|
272
|
+
system_prompt = (
|
|
273
|
+
"You are participating in a structured multi-model deliberation with other AI models. "
|
|
274
|
+
"You will see other models' exact responses and must engage with their specific arguments. "
|
|
275
|
+
"At the END of your response, you MUST include exactly one of these lines:\n"
|
|
276
|
+
"VERDICT: AGREE\n"
|
|
277
|
+
"VERDICT: DISAGREE — [one sentence reason]\n"
|
|
278
|
+
"VERDICT: AGREE WITH MODIFICATIONS — [one sentence modification]\n"
|
|
279
|
+
"Do not hedge. Take a clear position."
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
full_prompt = f"{context}\n\nQUESTION:\n{question}" if context else question
|
|
283
|
+
|
|
284
|
+
# Round 1: Independent responses
|
|
285
|
+
logger.info(f"Deliberation Round 1 ({mode} mode): Independent responses")
|
|
286
|
+
round1 = {"round": 1, "type": "independent", "responses": {}}
|
|
287
|
+
|
|
288
|
+
for model_id in model_ids:
|
|
289
|
+
if mode == "dialogue":
|
|
290
|
+
# Shorter initial prompt for dialogue
|
|
291
|
+
r1_prompt = f"{full_prompt}\n\nGive your initial take in 2-4 sentences. Don't write an essay."
|
|
292
|
+
else:
|
|
293
|
+
r1_prompt = full_prompt
|
|
294
|
+
response = _call_model(model_id, enabled_models[model_id], r1_prompt, system_prompt)
|
|
295
|
+
round1["responses"][model_id] = response
|
|
296
|
+
# Build flat thread
|
|
297
|
+
transcript["thread"].append({"model": model_id, "round": 1, "text": response})
|
|
298
|
+
logger.info(f" {model_id}: {len(response)} chars")
|
|
299
|
+
|
|
300
|
+
transcript["rounds"].append(round1)
|
|
301
|
+
|
|
302
|
+
# Subsequent rounds: Models see each other's responses
|
|
303
|
+
for round_num in range(2, max_rounds + 1):
|
|
304
|
+
logger.info(f"Deliberation Round {round_num} ({mode})")
|
|
305
|
+
round_data = {"round": round_num, "type": "deliberation", "responses": {}}
|
|
306
|
+
prev = transcript["rounds"][-1]["responses"]
|
|
307
|
+
|
|
308
|
+
for model_id in model_ids:
|
|
309
|
+
if mode == "dialogue":
|
|
310
|
+
# Dialogue: show the full conversation thread so far
|
|
311
|
+
thread_text = f"Topic: {question}\n\nConversation so far:\n"
|
|
312
|
+
for entry in transcript["thread"]:
|
|
313
|
+
name = enabled_models.get(entry["model"], {}).get("name", entry["model"])
|
|
314
|
+
thread_text += f"\n[{name}]: {entry['text']}\n"
|
|
315
|
+
thread_text += (
|
|
316
|
+
f"\nYour turn ({enabled_models[model_id]['name']}). "
|
|
317
|
+
f"Respond in 2-4 sentences to the conversation above. "
|
|
318
|
+
f"If you agree with the emerging consensus, say VERDICT: AGREE. "
|
|
319
|
+
f"If not, push back specifically."
|
|
320
|
+
)
|
|
321
|
+
cross_prompt = thread_text
|
|
322
|
+
else:
|
|
323
|
+
# Debate: show other models' full responses from last round
|
|
324
|
+
others_text = ""
|
|
325
|
+
for other_id in model_ids:
|
|
326
|
+
if other_id != model_id:
|
|
327
|
+
others_text += (
|
|
328
|
+
f"\n=== {enabled_models[other_id]['name'].upper()}'S EXACT RESPONSE "
|
|
329
|
+
f"(Round {round_num - 1}) ===\n"
|
|
330
|
+
f"{prev[other_id]}\n"
|
|
331
|
+
)
|
|
332
|
+
cross_prompt = (
|
|
333
|
+
f"DELIBERATION ROUND {round_num}\n\n"
|
|
334
|
+
f"Original question: {question}\n"
|
|
335
|
+
f"{others_text}\n"
|
|
336
|
+
f"Respond to the other models' SPECIFIC arguments. "
|
|
337
|
+
f"Quote them directly if you disagree. "
|
|
338
|
+
f"End with VERDICT: AGREE / DISAGREE / AGREE WITH MODIFICATIONS."
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
response = _call_model(model_id, enabled_models[model_id], cross_prompt, system_prompt)
|
|
342
|
+
round_data["responses"][model_id] = response
|
|
343
|
+
transcript["thread"].append({"model": model_id, "round": round_num, "text": response})
|
|
344
|
+
|
|
345
|
+
transcript["rounds"].append(round_data)
|
|
346
|
+
|
|
347
|
+
# Check for unanimous agreement
|
|
348
|
+
all_agree = True
|
|
349
|
+
for model_id in model_ids:
|
|
350
|
+
resp = round_data["responses"][model_id].upper()
|
|
351
|
+
if "VERDICT:" in resp:
|
|
352
|
+
verdict_part = resp.split("VERDICT:")[-1].strip()
|
|
353
|
+
agrees = verdict_part.startswith("AGREE")
|
|
354
|
+
if not agrees:
|
|
355
|
+
all_agree = False
|
|
356
|
+
else:
|
|
357
|
+
all_agree = False # No verdict = no agreement
|
|
358
|
+
|
|
359
|
+
if all_agree:
|
|
360
|
+
transcript["unanimous"] = True
|
|
361
|
+
transcript["final_verdict"] = "UNANIMOUS AGREEMENT"
|
|
362
|
+
transcript["agreed_at_round"] = round_num
|
|
363
|
+
break
|
|
364
|
+
else:
|
|
365
|
+
# Max rounds reached
|
|
366
|
+
transcript["final_verdict"] = "MAX ROUNDS REACHED"
|
|
367
|
+
for model_id in model_ids:
|
|
368
|
+
resp = transcript["rounds"][-1]["responses"][model_id].upper()
|
|
369
|
+
verdict = "unknown"
|
|
370
|
+
if "VERDICT:" in resp:
|
|
371
|
+
verdict_part = resp.split("VERDICT:")[-1].strip()
|
|
372
|
+
verdict = "agree" if verdict_part.startswith("AGREE") else "disagree"
|
|
373
|
+
transcript[f"{model_id}_final"] = verdict
|
|
374
|
+
|
|
375
|
+
transcript["completed_at"] = time.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
376
|
+
|
|
377
|
+
# Save transcript
|
|
378
|
+
save_to = save_path
|
|
379
|
+
if not save_to:
|
|
380
|
+
ts = time.strftime("%Y%m%d_%H%M%S")
|
|
381
|
+
save_to = str(DELIBERATION_DIR / f"deliberation_{ts}.json")
|
|
382
|
+
|
|
383
|
+
Path(save_to).parent.mkdir(parents=True, exist_ok=True)
|
|
384
|
+
Path(save_to).write_text(json.dumps(transcript, indent=2))
|
|
385
|
+
transcript["saved_to"] = save_to
|
|
386
|
+
|
|
387
|
+
return transcript
|