frontier-council 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- frontier_council/__init__.py +19 -0
- frontier_council/cli.py +214 -0
- frontier_council/council.py +830 -0
- frontier_council-0.1.0.dist-info/METADATA +150 -0
- frontier_council-0.1.0.dist-info/RECORD +7 -0
- frontier_council-0.1.0.dist-info/WHEEL +4 -0
- frontier_council-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Frontier Council - Multi-model deliberation for important decisions."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.0"
|
|
4
|
+
|
|
5
|
+
from .council import (
|
|
6
|
+
run_council,
|
|
7
|
+
run_blind_phase_parallel,
|
|
8
|
+
detect_social_context,
|
|
9
|
+
COUNCIL,
|
|
10
|
+
JUDGE_MODEL,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"run_council",
|
|
15
|
+
"run_blind_phase_parallel",
|
|
16
|
+
"detect_social_context",
|
|
17
|
+
"COUNCIL",
|
|
18
|
+
"JUDGE_MODEL",
|
|
19
|
+
]
|
frontier_council/cli.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""CLI entry point for frontier-council."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import random
|
|
7
|
+
import subprocess
|
|
8
|
+
import sys
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from .council import (
|
|
13
|
+
COUNCIL,
|
|
14
|
+
detect_social_context,
|
|
15
|
+
run_council,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def main():
|
|
20
|
+
parser = argparse.ArgumentParser(
|
|
21
|
+
description="LLM Council - Multi-model deliberation for important decisions",
|
|
22
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
23
|
+
epilog="""
|
|
24
|
+
Examples:
|
|
25
|
+
frontier-council "Should we use microservices or monolith?"
|
|
26
|
+
frontier-council "What questions should I ask?" --social
|
|
27
|
+
frontier-council "Career decision" --persona "builder who hates process work"
|
|
28
|
+
frontier-council "Architecture choice" --rounds 3 --output transcript.md
|
|
29
|
+
""",
|
|
30
|
+
)
|
|
31
|
+
parser.add_argument("question", help="The question for the council to deliberate")
|
|
32
|
+
parser.add_argument(
|
|
33
|
+
"--rounds",
|
|
34
|
+
type=int,
|
|
35
|
+
default=2,
|
|
36
|
+
help="Number of deliberation rounds (default: 2, exits early on consensus)",
|
|
37
|
+
)
|
|
38
|
+
parser.add_argument(
|
|
39
|
+
"--quiet",
|
|
40
|
+
action="store_true",
|
|
41
|
+
help="Suppress progress output",
|
|
42
|
+
)
|
|
43
|
+
parser.add_argument(
|
|
44
|
+
"--output", "-o",
|
|
45
|
+
help="Save transcript to file",
|
|
46
|
+
)
|
|
47
|
+
parser.add_argument(
|
|
48
|
+
"--named",
|
|
49
|
+
action="store_true",
|
|
50
|
+
help="Show real model names instead of anonymous Speaker 1, 2, etc.",
|
|
51
|
+
)
|
|
52
|
+
parser.add_argument(
|
|
53
|
+
"--no-blind",
|
|
54
|
+
action="store_true",
|
|
55
|
+
help="Skip blind first-pass (faster, but more anchoring bias)",
|
|
56
|
+
)
|
|
57
|
+
parser.add_argument(
|
|
58
|
+
"--context", "-c",
|
|
59
|
+
help="Context hint for the judge (e.g., 'architecture decision', 'ethics question')",
|
|
60
|
+
)
|
|
61
|
+
parser.add_argument(
|
|
62
|
+
"--share",
|
|
63
|
+
action="store_true",
|
|
64
|
+
help="Upload transcript to secret GitHub Gist and print URL",
|
|
65
|
+
)
|
|
66
|
+
parser.add_argument(
|
|
67
|
+
"--social",
|
|
68
|
+
action="store_true",
|
|
69
|
+
help="Enable social calibration mode (for interview questions, outreach, networking)",
|
|
70
|
+
)
|
|
71
|
+
parser.add_argument(
|
|
72
|
+
"--persona", "-p",
|
|
73
|
+
help="Context about the person asking (e.g., 'builder who hates process work')",
|
|
74
|
+
)
|
|
75
|
+
parser.add_argument(
|
|
76
|
+
"--advocate",
|
|
77
|
+
type=int,
|
|
78
|
+
choices=[1, 2, 3, 4, 5],
|
|
79
|
+
help="Which speaker (1-5) should be devil's advocate (default: random)",
|
|
80
|
+
)
|
|
81
|
+
args = parser.parse_args()
|
|
82
|
+
|
|
83
|
+
# Auto-detect social context if not explicitly set
|
|
84
|
+
social_mode = args.social or detect_social_context(args.question)
|
|
85
|
+
if social_mode and not args.social and not args.quiet:
|
|
86
|
+
print("(Auto-detected social context - enabling social calibration mode)")
|
|
87
|
+
print()
|
|
88
|
+
|
|
89
|
+
# Get API keys
|
|
90
|
+
api_key = os.environ.get("OPENROUTER_API_KEY")
|
|
91
|
+
if not api_key:
|
|
92
|
+
print("Error: OPENROUTER_API_KEY environment variable not set", file=sys.stderr)
|
|
93
|
+
sys.exit(1)
|
|
94
|
+
|
|
95
|
+
google_api_key = os.environ.get("GOOGLE_API_KEY")
|
|
96
|
+
moonshot_api_key = os.environ.get("MOONSHOT_API_KEY")
|
|
97
|
+
|
|
98
|
+
use_blind = not args.no_blind
|
|
99
|
+
|
|
100
|
+
if not args.quiet:
|
|
101
|
+
mode_parts = []
|
|
102
|
+
mode_parts.append("named" if args.named else "anonymous")
|
|
103
|
+
mode_parts.append("blind first-pass" if use_blind else "no blind phase")
|
|
104
|
+
if social_mode:
|
|
105
|
+
mode_parts.append("social calibration")
|
|
106
|
+
print(f"Running LLM Council ({', '.join(mode_parts)})...")
|
|
107
|
+
fallbacks = []
|
|
108
|
+
if google_api_key:
|
|
109
|
+
fallbacks.append("Gemini→AI Studio")
|
|
110
|
+
if moonshot_api_key:
|
|
111
|
+
fallbacks.append("Kimi→Moonshot")
|
|
112
|
+
if fallbacks:
|
|
113
|
+
print(f"(Fallbacks enabled: {', '.join(fallbacks)})")
|
|
114
|
+
print()
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
advocate_idx = (args.advocate - 1) if args.advocate else random.randint(0, len(COUNCIL) - 1)
|
|
118
|
+
|
|
119
|
+
if not args.quiet and args.persona:
|
|
120
|
+
print(f"(Persona context: {args.persona})")
|
|
121
|
+
print()
|
|
122
|
+
if not args.quiet:
|
|
123
|
+
advocate_name = COUNCIL[advocate_idx][0]
|
|
124
|
+
print(f"(Devil's advocate: {advocate_name})")
|
|
125
|
+
print()
|
|
126
|
+
|
|
127
|
+
transcript, failed_models = run_council(
|
|
128
|
+
question=args.question,
|
|
129
|
+
council_config=COUNCIL,
|
|
130
|
+
api_key=api_key,
|
|
131
|
+
google_api_key=google_api_key,
|
|
132
|
+
moonshot_api_key=moonshot_api_key,
|
|
133
|
+
rounds=args.rounds,
|
|
134
|
+
verbose=not args.quiet,
|
|
135
|
+
anonymous=not args.named,
|
|
136
|
+
blind=use_blind,
|
|
137
|
+
context=args.context,
|
|
138
|
+
social_mode=social_mode,
|
|
139
|
+
persona=args.persona,
|
|
140
|
+
advocate_idx=advocate_idx,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Print failure summary
|
|
144
|
+
if failed_models and not args.quiet:
|
|
145
|
+
print()
|
|
146
|
+
print("=" * 60)
|
|
147
|
+
print("⚠️ MODEL FAILURES")
|
|
148
|
+
print("=" * 60)
|
|
149
|
+
for failure in failed_models:
|
|
150
|
+
print(f" • {failure}")
|
|
151
|
+
working_count = len(COUNCIL) - len(set(f.split(":")[0].split(" (")[0] for f in failed_models))
|
|
152
|
+
print(f"\nCouncil ran with {working_count}/{len(COUNCIL)} models")
|
|
153
|
+
print("=" * 60)
|
|
154
|
+
print()
|
|
155
|
+
|
|
156
|
+
# Save transcript
|
|
157
|
+
if args.output:
|
|
158
|
+
Path(args.output).write_text(transcript)
|
|
159
|
+
if not args.quiet:
|
|
160
|
+
print(f"Transcript saved to: {args.output}")
|
|
161
|
+
|
|
162
|
+
# Share via gist
|
|
163
|
+
gist_url = None
|
|
164
|
+
if args.share:
|
|
165
|
+
try:
|
|
166
|
+
import tempfile
|
|
167
|
+
with tempfile.NamedTemporaryFile(
|
|
168
|
+
mode='w', suffix='.md', prefix='council-', delete=False
|
|
169
|
+
) as f:
|
|
170
|
+
f.write(f"# LLM Council Deliberation\n\n")
|
|
171
|
+
f.write(f"**Question:** {args.question}\n\n")
|
|
172
|
+
if args.context:
|
|
173
|
+
f.write(f"**Context:** {args.context}\n\n")
|
|
174
|
+
f.write(f"**Date:** {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n---\n\n")
|
|
175
|
+
f.write(transcript)
|
|
176
|
+
temp_path = f.name
|
|
177
|
+
|
|
178
|
+
result = subprocess.run(
|
|
179
|
+
["gh", "gist", "create", temp_path, "--desc", f"LLM Council: {args.question[:50]}"],
|
|
180
|
+
capture_output=True, text=True
|
|
181
|
+
)
|
|
182
|
+
os.unlink(temp_path)
|
|
183
|
+
|
|
184
|
+
if result.returncode == 0:
|
|
185
|
+
gist_url = result.stdout.strip()
|
|
186
|
+
print(f"\n🔗 Shared: {gist_url}")
|
|
187
|
+
else:
|
|
188
|
+
print(f"Gist creation failed: {result.stderr}", file=sys.stderr)
|
|
189
|
+
except FileNotFoundError:
|
|
190
|
+
print("Error: 'gh' CLI not found. Install with: brew install gh", file=sys.stderr)
|
|
191
|
+
|
|
192
|
+
# Log to history
|
|
193
|
+
history_file = Path(__file__).parent.parent / "council_history.jsonl"
|
|
194
|
+
log_entry = {
|
|
195
|
+
"timestamp": datetime.now().isoformat(),
|
|
196
|
+
"question": args.question[:200],
|
|
197
|
+
"gist": gist_url,
|
|
198
|
+
"context": args.context,
|
|
199
|
+
"rounds": args.rounds,
|
|
200
|
+
"blind": use_blind,
|
|
201
|
+
"models": [name for name, _, _ in COUNCIL],
|
|
202
|
+
}
|
|
203
|
+
with open(history_file, "a") as f:
|
|
204
|
+
f.write(json.dumps(log_entry) + "\n")
|
|
205
|
+
|
|
206
|
+
except Exception as e:
|
|
207
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
208
|
+
import traceback
|
|
209
|
+
traceback.print_exc()
|
|
210
|
+
sys.exit(1)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
if __name__ == "__main__":
|
|
214
|
+
main()
|
|
@@ -0,0 +1,830 @@
|
|
|
1
|
+
"""Core council deliberation logic."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import httpx
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
|
|
10
|
+
GOOGLE_AI_STUDIO_URL = "https://generativelanguage.googleapis.com/v1beta/models"
|
|
11
|
+
MOONSHOT_URL = "https://api.moonshot.cn/v1/chat/completions"
|
|
12
|
+
|
|
13
|
+
# Model configurations (all via OpenRouter, with fallbacks where available)
|
|
14
|
+
# Format: (name, openrouter_model, fallback) - fallback is (provider, model) or None
|
|
15
|
+
# Providers: "google" = AI Studio, "moonshot" = Moonshot API
|
|
16
|
+
COUNCIL = [
|
|
17
|
+
("Claude", "anthropic/claude-opus-4.5", None),
|
|
18
|
+
("GPT", "openai/gpt-5.2-pro", None),
|
|
19
|
+
("Gemini", "google/gemini-3-pro-preview", ("google", "gemini-2.5-pro")),
|
|
20
|
+
("Grok", "x-ai/grok-4", None),
|
|
21
|
+
("Kimi", "moonshotai/kimi-k2.5", ("moonshot", "kimi-k2.5")),
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
JUDGE_MODEL = "anthropic/claude-opus-4.5"
|
|
25
|
+
|
|
26
|
+
# Keywords that suggest social/conversational context (auto-detect)
|
|
27
|
+
SOCIAL_KEYWORDS = [
|
|
28
|
+
"interview", "ask him", "ask her", "ask them", "question to ask",
|
|
29
|
+
"networking", "outreach", "message", "email", "linkedin",
|
|
30
|
+
"coffee chat", "informational", "reach out", "follow up",
|
|
31
|
+
"what should i say", "how should i respond", "conversation",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
# Thinking models don't stream well - use non-streaming for these
|
|
35
|
+
THINKING_MODEL_SUFFIXES = {
|
|
36
|
+
"gemini-3-pro-preview",
|
|
37
|
+
"kimi-k2.5",
|
|
38
|
+
"deepseek-r1",
|
|
39
|
+
"o1-preview", "o1-mini", "o1",
|
|
40
|
+
"o3-preview", "o3-mini", "o3",
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def is_thinking_model(model: str) -> bool:
|
|
45
|
+
"""Check if model is a thinking model that doesn't stream well."""
|
|
46
|
+
model_name = model.split("/")[-1].lower()
|
|
47
|
+
return model_name in THINKING_MODEL_SUFFIXES
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def detect_social_context(question: str) -> bool:
|
|
51
|
+
"""Auto-detect if the question is about social/conversational context."""
|
|
52
|
+
question_lower = question.lower()
|
|
53
|
+
return any(keyword in question_lower for keyword in SOCIAL_KEYWORDS)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def query_model(
|
|
57
|
+
api_key: str,
|
|
58
|
+
model: str,
|
|
59
|
+
messages: list[dict],
|
|
60
|
+
max_tokens: int = 1500,
|
|
61
|
+
timeout: float = 120.0,
|
|
62
|
+
stream: bool = False,
|
|
63
|
+
retries: int = 2,
|
|
64
|
+
) -> str:
|
|
65
|
+
"""Query a model via OpenRouter with retry logic for flaky models."""
|
|
66
|
+
if is_thinking_model(model):
|
|
67
|
+
max_tokens = max(max_tokens, 4000)
|
|
68
|
+
timeout = max(timeout, 180.0)
|
|
69
|
+
|
|
70
|
+
if stream and not is_thinking_model(model):
|
|
71
|
+
result = query_model_streaming(api_key, model, messages, max_tokens, timeout)
|
|
72
|
+
if not result.startswith("["):
|
|
73
|
+
return result
|
|
74
|
+
print("(Streaming failed, retrying without streaming...)", flush=True)
|
|
75
|
+
|
|
76
|
+
for attempt in range(retries + 1):
|
|
77
|
+
try:
|
|
78
|
+
response = httpx.post(
|
|
79
|
+
OPENROUTER_URL,
|
|
80
|
+
headers={"Authorization": f"Bearer {api_key}"},
|
|
81
|
+
json={
|
|
82
|
+
"model": model,
|
|
83
|
+
"messages": messages,
|
|
84
|
+
"max_tokens": max_tokens,
|
|
85
|
+
},
|
|
86
|
+
timeout=timeout,
|
|
87
|
+
)
|
|
88
|
+
except (httpx.RequestError, httpx.RemoteProtocolError) as e:
|
|
89
|
+
if attempt < retries:
|
|
90
|
+
continue
|
|
91
|
+
return f"[Error: Connection failed for {model}: {e}]"
|
|
92
|
+
|
|
93
|
+
if response.status_code != 200:
|
|
94
|
+
if attempt < retries:
|
|
95
|
+
continue
|
|
96
|
+
return f"[Error: HTTP {response.status_code} from {model}]"
|
|
97
|
+
|
|
98
|
+
data = response.json()
|
|
99
|
+
|
|
100
|
+
if "error" in data:
|
|
101
|
+
if attempt < retries:
|
|
102
|
+
continue
|
|
103
|
+
return f"[Error: {data['error'].get('message', data['error'])}]"
|
|
104
|
+
|
|
105
|
+
if "choices" not in data or not data["choices"]:
|
|
106
|
+
if attempt < retries:
|
|
107
|
+
continue
|
|
108
|
+
return f"[Error: No response from {model}]"
|
|
109
|
+
|
|
110
|
+
content = data["choices"][0]["message"]["content"]
|
|
111
|
+
|
|
112
|
+
if not content or not content.strip():
|
|
113
|
+
reasoning = data["choices"][0]["message"].get("reasoning", "")
|
|
114
|
+
if reasoning and reasoning.strip():
|
|
115
|
+
if attempt < retries:
|
|
116
|
+
continue
|
|
117
|
+
return f"[Model still thinking - needs more tokens. Partial reasoning: {reasoning[:150]}...]"
|
|
118
|
+
if attempt < retries:
|
|
119
|
+
continue
|
|
120
|
+
return f"[No response from {model} after {retries + 1} attempts]"
|
|
121
|
+
|
|
122
|
+
if "<think>" in content:
|
|
123
|
+
content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL).strip()
|
|
124
|
+
|
|
125
|
+
return content
|
|
126
|
+
|
|
127
|
+
return f"[Error: Failed to get response from {model}]"
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def query_google_ai_studio(
|
|
131
|
+
api_key: str,
|
|
132
|
+
model: str,
|
|
133
|
+
messages: list[dict],
|
|
134
|
+
max_tokens: int = 8192,
|
|
135
|
+
timeout: float = 120.0,
|
|
136
|
+
retries: int = 2,
|
|
137
|
+
) -> str:
|
|
138
|
+
"""Query Google AI Studio directly (fallback for Gemini models)."""
|
|
139
|
+
contents = []
|
|
140
|
+
system_instruction = None
|
|
141
|
+
|
|
142
|
+
for msg in messages:
|
|
143
|
+
role = msg["role"]
|
|
144
|
+
content = msg["content"]
|
|
145
|
+
|
|
146
|
+
if role == "system":
|
|
147
|
+
system_instruction = content
|
|
148
|
+
elif role == "user":
|
|
149
|
+
contents.append({"role": "user", "parts": [{"text": content}]})
|
|
150
|
+
elif role == "assistant":
|
|
151
|
+
contents.append({"role": "model", "parts": [{"text": content}]})
|
|
152
|
+
|
|
153
|
+
body = {
|
|
154
|
+
"contents": contents,
|
|
155
|
+
"generationConfig": {
|
|
156
|
+
"maxOutputTokens": max_tokens,
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
if system_instruction:
|
|
160
|
+
body["systemInstruction"] = {"parts": [{"text": system_instruction}]}
|
|
161
|
+
|
|
162
|
+
url = f"{GOOGLE_AI_STUDIO_URL}/{model}:generateContent?key={api_key}"
|
|
163
|
+
|
|
164
|
+
for attempt in range(retries + 1):
|
|
165
|
+
try:
|
|
166
|
+
response = httpx.post(url, json=body, timeout=timeout)
|
|
167
|
+
|
|
168
|
+
if response.status_code != 200:
|
|
169
|
+
if attempt < retries:
|
|
170
|
+
continue
|
|
171
|
+
return f"[Error: HTTP {response.status_code} from AI Studio {model}]"
|
|
172
|
+
|
|
173
|
+
data = response.json()
|
|
174
|
+
|
|
175
|
+
if "error" in data:
|
|
176
|
+
if attempt < retries:
|
|
177
|
+
continue
|
|
178
|
+
return f"[Error: {data['error'].get('message', data['error'])}]"
|
|
179
|
+
|
|
180
|
+
candidates = data.get("candidates", [])
|
|
181
|
+
if not candidates:
|
|
182
|
+
if attempt < retries:
|
|
183
|
+
continue
|
|
184
|
+
return f"[Error: No candidates from AI Studio {model}]"
|
|
185
|
+
|
|
186
|
+
parts = candidates[0].get("content", {}).get("parts", [])
|
|
187
|
+
if not parts:
|
|
188
|
+
if attempt < retries:
|
|
189
|
+
continue
|
|
190
|
+
return f"[Error: No content from AI Studio {model}]"
|
|
191
|
+
|
|
192
|
+
content = parts[0].get("text", "")
|
|
193
|
+
if not content.strip():
|
|
194
|
+
if attempt < retries:
|
|
195
|
+
continue
|
|
196
|
+
return f"[No response from AI Studio {model} after {retries + 1} attempts]"
|
|
197
|
+
|
|
198
|
+
return content
|
|
199
|
+
|
|
200
|
+
except httpx.TimeoutException:
|
|
201
|
+
if attempt < retries:
|
|
202
|
+
continue
|
|
203
|
+
return f"[Error: Timeout from AI Studio {model}]"
|
|
204
|
+
except httpx.RequestError as e:
|
|
205
|
+
if attempt < retries:
|
|
206
|
+
continue
|
|
207
|
+
return f"[Error: Request failed for AI Studio {model}: {e}]"
|
|
208
|
+
|
|
209
|
+
return f"[Error: Failed to get response from AI Studio {model}]"
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def query_moonshot(
|
|
213
|
+
api_key: str,
|
|
214
|
+
model: str,
|
|
215
|
+
messages: list[dict],
|
|
216
|
+
max_tokens: int = 8192,
|
|
217
|
+
timeout: float = 120.0,
|
|
218
|
+
retries: int = 2,
|
|
219
|
+
) -> str:
|
|
220
|
+
"""Query Moonshot API directly (fallback for Kimi models)."""
|
|
221
|
+
for attempt in range(retries + 1):
|
|
222
|
+
try:
|
|
223
|
+
response = httpx.post(
|
|
224
|
+
MOONSHOT_URL,
|
|
225
|
+
headers={"Authorization": f"Bearer {api_key}"},
|
|
226
|
+
json={
|
|
227
|
+
"model": model,
|
|
228
|
+
"messages": messages,
|
|
229
|
+
"max_tokens": max_tokens,
|
|
230
|
+
},
|
|
231
|
+
timeout=timeout,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
if response.status_code != 200:
|
|
235
|
+
if attempt < retries:
|
|
236
|
+
continue
|
|
237
|
+
return f"[Error: HTTP {response.status_code} from Moonshot {model}]"
|
|
238
|
+
|
|
239
|
+
data = response.json()
|
|
240
|
+
|
|
241
|
+
if "error" in data:
|
|
242
|
+
if attempt < retries:
|
|
243
|
+
continue
|
|
244
|
+
return f"[Error: {data['error'].get('message', data['error'])}]"
|
|
245
|
+
|
|
246
|
+
if "choices" not in data or not data["choices"]:
|
|
247
|
+
if attempt < retries:
|
|
248
|
+
continue
|
|
249
|
+
return f"[Error: No response from Moonshot {model}]"
|
|
250
|
+
|
|
251
|
+
content = data["choices"][0]["message"]["content"]
|
|
252
|
+
|
|
253
|
+
if not content or not content.strip():
|
|
254
|
+
if attempt < retries:
|
|
255
|
+
continue
|
|
256
|
+
return f"[No response from Moonshot {model} after {retries + 1} attempts]"
|
|
257
|
+
|
|
258
|
+
return content
|
|
259
|
+
|
|
260
|
+
except httpx.TimeoutException:
|
|
261
|
+
if attempt < retries:
|
|
262
|
+
continue
|
|
263
|
+
return f"[Error: Timeout from Moonshot {model}]"
|
|
264
|
+
except httpx.RequestError as e:
|
|
265
|
+
if attempt < retries:
|
|
266
|
+
continue
|
|
267
|
+
return f"[Error: Request failed for Moonshot {model}: {e}]"
|
|
268
|
+
|
|
269
|
+
return f"[Error: Failed to get response from Moonshot {model}]"
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def query_model_streaming(
|
|
273
|
+
api_key: str,
|
|
274
|
+
model: str,
|
|
275
|
+
messages: list[dict],
|
|
276
|
+
max_tokens: int = 1500,
|
|
277
|
+
timeout: float = 120.0,
|
|
278
|
+
) -> str:
|
|
279
|
+
"""Query a model with streaming output - prints tokens as they arrive."""
|
|
280
|
+
import json as json_module
|
|
281
|
+
|
|
282
|
+
full_content = []
|
|
283
|
+
in_think_block = False
|
|
284
|
+
error_msg = None
|
|
285
|
+
|
|
286
|
+
try:
|
|
287
|
+
with httpx.stream(
|
|
288
|
+
"POST",
|
|
289
|
+
OPENROUTER_URL,
|
|
290
|
+
headers={"Authorization": f"Bearer {api_key}"},
|
|
291
|
+
json={
|
|
292
|
+
"model": model,
|
|
293
|
+
"messages": messages,
|
|
294
|
+
"max_tokens": max_tokens,
|
|
295
|
+
"stream": True,
|
|
296
|
+
},
|
|
297
|
+
timeout=timeout,
|
|
298
|
+
) as response:
|
|
299
|
+
if response.status_code != 200:
|
|
300
|
+
error_msg = f"[Error: HTTP {response.status_code} from {model}]"
|
|
301
|
+
else:
|
|
302
|
+
for line in response.iter_lines():
|
|
303
|
+
if not line or line.startswith(":"):
|
|
304
|
+
continue
|
|
305
|
+
|
|
306
|
+
if line.startswith("data: "):
|
|
307
|
+
data_str = line[6:]
|
|
308
|
+
if data_str.strip() == "[DONE]":
|
|
309
|
+
break
|
|
310
|
+
|
|
311
|
+
try:
|
|
312
|
+
data = json_module.loads(data_str)
|
|
313
|
+
if "error" in data:
|
|
314
|
+
error_msg = f"[Error: {data['error'].get('message', data['error'])}]"
|
|
315
|
+
break
|
|
316
|
+
|
|
317
|
+
if "choices" in data and data["choices"]:
|
|
318
|
+
delta = data["choices"][0].get("delta", {})
|
|
319
|
+
content = delta.get("content", "")
|
|
320
|
+
if content:
|
|
321
|
+
if "<think>" in content:
|
|
322
|
+
in_think_block = True
|
|
323
|
+
if in_think_block:
|
|
324
|
+
if "</think>" in content:
|
|
325
|
+
in_think_block = False
|
|
326
|
+
content = content.split("</think>", 1)[-1]
|
|
327
|
+
else:
|
|
328
|
+
continue
|
|
329
|
+
|
|
330
|
+
if content:
|
|
331
|
+
print(content, end="", flush=True)
|
|
332
|
+
full_content.append(content)
|
|
333
|
+
except json_module.JSONDecodeError:
|
|
334
|
+
pass
|
|
335
|
+
|
|
336
|
+
except httpx.TimeoutException:
|
|
337
|
+
error_msg = f"[Error: Timeout from {model}]"
|
|
338
|
+
except (httpx.RequestError, httpx.RemoteProtocolError) as e:
|
|
339
|
+
error_msg = f"[Error: Connection failed for {model}: {e}]"
|
|
340
|
+
|
|
341
|
+
print()
|
|
342
|
+
|
|
343
|
+
if error_msg:
|
|
344
|
+
print(error_msg)
|
|
345
|
+
return error_msg
|
|
346
|
+
|
|
347
|
+
if not full_content:
|
|
348
|
+
empty_msg = f"[No response from {model}]"
|
|
349
|
+
print(empty_msg)
|
|
350
|
+
return empty_msg
|
|
351
|
+
|
|
352
|
+
return "".join(full_content)
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
async def query_model_async(
|
|
356
|
+
client: httpx.AsyncClient,
|
|
357
|
+
model: str,
|
|
358
|
+
messages: list[dict],
|
|
359
|
+
name: str,
|
|
360
|
+
fallback: tuple[str, str] | None = None,
|
|
361
|
+
google_api_key: str | None = None,
|
|
362
|
+
moonshot_api_key: str | None = None,
|
|
363
|
+
max_tokens: int = 500,
|
|
364
|
+
retries: int = 2,
|
|
365
|
+
) -> tuple[str, str, str]:
|
|
366
|
+
"""Async query for parallel blind phase. Returns (name, model_name, response)."""
|
|
367
|
+
if is_thinking_model(model):
|
|
368
|
+
max_tokens = max(max_tokens, 2000)
|
|
369
|
+
|
|
370
|
+
model_name = model.split("/")[-1]
|
|
371
|
+
|
|
372
|
+
for attempt in range(retries + 1):
|
|
373
|
+
try:
|
|
374
|
+
response = await client.post(
|
|
375
|
+
OPENROUTER_URL,
|
|
376
|
+
json={
|
|
377
|
+
"model": model,
|
|
378
|
+
"messages": messages,
|
|
379
|
+
"max_tokens": max_tokens,
|
|
380
|
+
},
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
if response.status_code != 200:
|
|
384
|
+
if attempt < retries:
|
|
385
|
+
continue
|
|
386
|
+
break
|
|
387
|
+
|
|
388
|
+
data = response.json()
|
|
389
|
+
|
|
390
|
+
if "error" in data:
|
|
391
|
+
if attempt < retries:
|
|
392
|
+
continue
|
|
393
|
+
break
|
|
394
|
+
|
|
395
|
+
if "choices" not in data or not data["choices"]:
|
|
396
|
+
if attempt < retries:
|
|
397
|
+
continue
|
|
398
|
+
break
|
|
399
|
+
|
|
400
|
+
content = data["choices"][0]["message"]["content"]
|
|
401
|
+
|
|
402
|
+
if not content or not content.strip():
|
|
403
|
+
reasoning = data["choices"][0]["message"].get("reasoning", "")
|
|
404
|
+
if reasoning and reasoning.strip():
|
|
405
|
+
if attempt < retries:
|
|
406
|
+
continue
|
|
407
|
+
return (name, model_name, f"[Model still thinking - increase max_tokens. Partial: {reasoning[:200]}...]")
|
|
408
|
+
if attempt < retries:
|
|
409
|
+
continue
|
|
410
|
+
break
|
|
411
|
+
|
|
412
|
+
if "<think>" in content:
|
|
413
|
+
content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL).strip()
|
|
414
|
+
|
|
415
|
+
return (name, model_name, content)
|
|
416
|
+
|
|
417
|
+
except (httpx.RequestError, httpx.RemoteProtocolError):
|
|
418
|
+
if attempt < retries:
|
|
419
|
+
continue
|
|
420
|
+
break
|
|
421
|
+
|
|
422
|
+
# Try fallbacks synchronously
|
|
423
|
+
if fallback:
|
|
424
|
+
fallback_provider, fallback_model = fallback
|
|
425
|
+
if fallback_provider == "google" and google_api_key:
|
|
426
|
+
response = query_google_ai_studio(google_api_key, fallback_model, messages, max_tokens=max_tokens)
|
|
427
|
+
return (name, fallback_model, response)
|
|
428
|
+
elif fallback_provider == "moonshot" and moonshot_api_key:
|
|
429
|
+
response = query_moonshot(moonshot_api_key, fallback_model, messages, max_tokens=max_tokens)
|
|
430
|
+
return (name, fallback_model, response)
|
|
431
|
+
|
|
432
|
+
return (name, model_name, f"[No response from {model_name} after {retries + 1} attempts]")
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
async def run_blind_phase_parallel(
|
|
436
|
+
question: str,
|
|
437
|
+
council_config: list[tuple[str, str, tuple[str, str] | None]],
|
|
438
|
+
api_key: str,
|
|
439
|
+
google_api_key: str | None = None,
|
|
440
|
+
moonshot_api_key: str | None = None,
|
|
441
|
+
verbose: bool = True,
|
|
442
|
+
persona: str | None = None,
|
|
443
|
+
) -> list[tuple[str, str, str]]:
|
|
444
|
+
"""Parallel blind first-pass: all models stake claims simultaneously."""
|
|
445
|
+
blind_system = """You are participating in the BLIND PHASE of a council deliberation.
|
|
446
|
+
|
|
447
|
+
Stake your initial position on the question BEFORE seeing what others think.
|
|
448
|
+
This prevents anchoring bias.
|
|
449
|
+
|
|
450
|
+
Provide a CLAIM SKETCH (not a full response):
|
|
451
|
+
1. Your core position (1-2 sentences)
|
|
452
|
+
2. Top 3 supporting claims or considerations
|
|
453
|
+
3. Key assumption or uncertainty
|
|
454
|
+
|
|
455
|
+
Keep it concise (~100 words). The full deliberation comes later."""
|
|
456
|
+
|
|
457
|
+
if persona:
|
|
458
|
+
blind_system += f"""
|
|
459
|
+
|
|
460
|
+
IMPORTANT CONTEXT about the person asking:
|
|
461
|
+
{persona}
|
|
462
|
+
|
|
463
|
+
Factor this into your advice — don't just give strategically optimal answers, consider what fits THIS person."""
|
|
464
|
+
|
|
465
|
+
if verbose:
|
|
466
|
+
print("=" * 60)
|
|
467
|
+
print("BLIND PHASE (independent claims)")
|
|
468
|
+
print("=" * 60)
|
|
469
|
+
print()
|
|
470
|
+
|
|
471
|
+
messages = [
|
|
472
|
+
{"role": "system", "content": blind_system},
|
|
473
|
+
{"role": "user", "content": f"Question:\n\n{question}"},
|
|
474
|
+
]
|
|
475
|
+
|
|
476
|
+
async with httpx.AsyncClient(
|
|
477
|
+
headers={"Authorization": f"Bearer {api_key}"},
|
|
478
|
+
timeout=120.0,
|
|
479
|
+
) as client:
|
|
480
|
+
tasks = [
|
|
481
|
+
query_model_async(
|
|
482
|
+
client, model, messages, name, fallback,
|
|
483
|
+
google_api_key, moonshot_api_key
|
|
484
|
+
)
|
|
485
|
+
for name, model, fallback in council_config
|
|
486
|
+
]
|
|
487
|
+
|
|
488
|
+
if verbose:
|
|
489
|
+
print("(querying all models in parallel...)")
|
|
490
|
+
|
|
491
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
492
|
+
|
|
493
|
+
blind_claims = []
|
|
494
|
+
for i, result in enumerate(results):
|
|
495
|
+
name, model, _ = council_config[i]
|
|
496
|
+
model_name = model.split("/")[-1]
|
|
497
|
+
|
|
498
|
+
if isinstance(result, Exception):
|
|
499
|
+
blind_claims.append((name, model_name, f"[Error: {result}]"))
|
|
500
|
+
else:
|
|
501
|
+
blind_claims.append(result)
|
|
502
|
+
|
|
503
|
+
if verbose:
|
|
504
|
+
print()
|
|
505
|
+
for name, model_name, claims in blind_claims:
|
|
506
|
+
print(f"### {model_name} (blind)")
|
|
507
|
+
print(claims)
|
|
508
|
+
print()
|
|
509
|
+
|
|
510
|
+
return blind_claims
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
def sanitize_speaker_content(content: str) -> str:
|
|
514
|
+
"""Sanitize speaker content to prevent prompt injection."""
|
|
515
|
+
sanitized = content.replace("SYSTEM:", "[SYSTEM]:")
|
|
516
|
+
sanitized = sanitized.replace("INSTRUCTION:", "[INSTRUCTION]:")
|
|
517
|
+
sanitized = sanitized.replace("IGNORE PREVIOUS", "[IGNORE PREVIOUS]")
|
|
518
|
+
sanitized = sanitized.replace("OVERRIDE:", "[OVERRIDE]:")
|
|
519
|
+
return sanitized
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def detect_consensus(conversation: list[tuple[str, str]], council_size: int) -> tuple[bool, str]:
|
|
523
|
+
"""Detect if council has converged. Returns (converged, reason)."""
|
|
524
|
+
if len(conversation) < council_size:
|
|
525
|
+
return False, "insufficient responses"
|
|
526
|
+
|
|
527
|
+
recent = [text for _, text in conversation[-council_size:]]
|
|
528
|
+
|
|
529
|
+
consensus_count = sum(1 for text in recent if "CONSENSUS:" in text.upper())
|
|
530
|
+
if consensus_count >= council_size - 1:
|
|
531
|
+
return True, "explicit consensus signals"
|
|
532
|
+
|
|
533
|
+
agreement_phrases = ["i agree with", "i concur", "we all agree", "consensus emerging"]
|
|
534
|
+
agreement_count = sum(
|
|
535
|
+
1 for text in recent
|
|
536
|
+
if any(phrase in text.lower() for phrase in agreement_phrases)
|
|
537
|
+
)
|
|
538
|
+
if agreement_count >= council_size - 1:
|
|
539
|
+
return True, "agreement language detected"
|
|
540
|
+
|
|
541
|
+
return False, "no consensus"
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
def run_council(
|
|
545
|
+
question: str,
|
|
546
|
+
council_config: list[tuple[str, str, tuple[str, str] | None]],
|
|
547
|
+
api_key: str,
|
|
548
|
+
google_api_key: str | None = None,
|
|
549
|
+
moonshot_api_key: str | None = None,
|
|
550
|
+
rounds: int = 1,
|
|
551
|
+
verbose: bool = True,
|
|
552
|
+
anonymous: bool = True,
|
|
553
|
+
blind: bool = True,
|
|
554
|
+
context: str | None = None,
|
|
555
|
+
social_mode: bool = False,
|
|
556
|
+
persona: str | None = None,
|
|
557
|
+
advocate_idx: int | None = None,
|
|
558
|
+
) -> tuple[str, list[str]]:
|
|
559
|
+
"""Run the council deliberation. Returns (transcript, failed_models)."""
|
|
560
|
+
|
|
561
|
+
council_names = [name for name, _, _ in council_config]
|
|
562
|
+
blind_claims = []
|
|
563
|
+
failed_models = []
|
|
564
|
+
|
|
565
|
+
if blind:
|
|
566
|
+
blind_claims = asyncio.run(run_blind_phase_parallel(
|
|
567
|
+
question, council_config, api_key, google_api_key, moonshot_api_key, verbose, persona
|
|
568
|
+
))
|
|
569
|
+
for name, model_name, claims in blind_claims:
|
|
570
|
+
if claims.startswith("["):
|
|
571
|
+
failed_models.append(f"{model_name} (blind): {claims}")
|
|
572
|
+
|
|
573
|
+
if anonymous:
|
|
574
|
+
display_names = {name: f"Speaker {i+1}" for i, (name, _, _) in enumerate(council_config)}
|
|
575
|
+
else:
|
|
576
|
+
display_names = {name: name for name, _, _ in council_config}
|
|
577
|
+
|
|
578
|
+
if verbose:
|
|
579
|
+
print(f"Council members: {council_names}")
|
|
580
|
+
if anonymous:
|
|
581
|
+
print("(Models see each other as Speaker 1, 2, etc. to prevent bias)")
|
|
582
|
+
print(f"Rounds: {rounds}")
|
|
583
|
+
print(f"Question: {question[:100]}{'...' if len(question) > 100 else ''}")
|
|
584
|
+
print()
|
|
585
|
+
print("=" * 60)
|
|
586
|
+
print("COUNCIL DELIBERATION")
|
|
587
|
+
print("=" * 60)
|
|
588
|
+
print()
|
|
589
|
+
|
|
590
|
+
conversation = []
|
|
591
|
+
output_parts = []
|
|
592
|
+
|
|
593
|
+
if blind_claims:
|
|
594
|
+
for name, model_name, claims in blind_claims:
|
|
595
|
+
output_parts.append(f"### {model_name} (blind)\n{claims}")
|
|
596
|
+
|
|
597
|
+
blind_context = ""
|
|
598
|
+
if blind_claims:
|
|
599
|
+
blind_lines = []
|
|
600
|
+
for name, _, claims in blind_claims:
|
|
601
|
+
dname = display_names[name]
|
|
602
|
+
blind_lines.append(f"**{dname}**: {sanitize_speaker_content(claims)}")
|
|
603
|
+
blind_context = "\n\n".join(blind_lines)
|
|
604
|
+
|
|
605
|
+
social_constraint = """
|
|
606
|
+
|
|
607
|
+
SOCIAL CALIBRATION: This is a social/conversational context (interview, networking, outreach).
|
|
608
|
+
Your output should feel natural in conversation - something you'd actually say over coffee.
|
|
609
|
+
Avoid structured, multi-part diagnostic questions that sound like interrogation.
|
|
610
|
+
Simple and human beats strategic and comprehensive. Optimize for being relatable, not thorough."""
|
|
611
|
+
|
|
612
|
+
devils_advocate_addition = """
|
|
613
|
+
|
|
614
|
+
SPECIAL ROLE: You are the DEVIL'S ADVOCATE. Your job is to push back HARD.
|
|
615
|
+
|
|
616
|
+
REQUIREMENTS:
|
|
617
|
+
1. You MUST explicitly DISAGREE with at least one major point from the other speakers
|
|
618
|
+
2. Identify the weakest assumption in the emerging consensus and attack it
|
|
619
|
+
3. Consider: What would make this advice WRONG? What's the contrarian take?
|
|
620
|
+
4. If everyone is converging too fast, that's a red flag — find the hidden complexity
|
|
621
|
+
|
|
622
|
+
Don't just "add nuance" or "build on" — find something to genuinely challenge.
|
|
623
|
+
If you can't find real disagreement, say why the consensus might be groupthink."""
|
|
624
|
+
|
|
625
|
+
first_speaker_with_blind = """You are {name}, speaking first in Round {round_num} of a council deliberation.
|
|
626
|
+
|
|
627
|
+
You've seen everyone's BLIND CLAIMS (their independent initial positions). Now engage:
|
|
628
|
+
1. Reference at least ONE other speaker's blind claim
|
|
629
|
+
2. Agree, disagree, or build on their position
|
|
630
|
+
3. Develop your own position further based on what you've learned
|
|
631
|
+
|
|
632
|
+
Be direct. Challenge weak arguments. Don't be sycophantic."""
|
|
633
|
+
|
|
634
|
+
first_speaker_system = """You are {name}, speaking first in Round {round_num} of a council deliberation.
|
|
635
|
+
|
|
636
|
+
As the first speaker, stake a clear position on the question. Be specific and substantive so others can engage with your points.
|
|
637
|
+
|
|
638
|
+
End with 2-3 key claims that others should respond to."""
|
|
639
|
+
|
|
640
|
+
council_system = """You are {name}, participating in Round {round_num} of a council deliberation.
|
|
641
|
+
|
|
642
|
+
REQUIREMENTS for your response:
|
|
643
|
+
1. Reference at least ONE previous speaker by name (e.g., "I agree with Speaker 1 that..." or "Speaker 2's point about X overlooks...")
|
|
644
|
+
2. State explicitly: AGREE, DISAGREE, or BUILD ON their specific point
|
|
645
|
+
3. Add ONE new consideration not yet raised
|
|
646
|
+
4. Keep response under 250 words — be concise and practical
|
|
647
|
+
|
|
648
|
+
If you fully agree with emerging consensus, say: "CONSENSUS: [the agreed position]"
|
|
649
|
+
|
|
650
|
+
Previous speakers this round: {previous_speakers}
|
|
651
|
+
|
|
652
|
+
Be direct. Challenge weak arguments. Don't be sycophantic.
|
|
653
|
+
Prioritize PRACTICAL, ACTIONABLE advice over academic observations. Avoid jargon."""
|
|
654
|
+
|
|
655
|
+
for round_num in range(rounds):
|
|
656
|
+
round_speakers = []
|
|
657
|
+
for idx, (name, model, fallback) in enumerate(council_config):
|
|
658
|
+
dname = display_names[name]
|
|
659
|
+
|
|
660
|
+
if idx == 0 and round_num == 0:
|
|
661
|
+
if blind_claims:
|
|
662
|
+
system_prompt = first_speaker_with_blind.format(name=dname, round_num=round_num + 1)
|
|
663
|
+
else:
|
|
664
|
+
system_prompt = first_speaker_system.format(name=dname, round_num=round_num + 1)
|
|
665
|
+
else:
|
|
666
|
+
if round_speakers:
|
|
667
|
+
previous = ", ".join(round_speakers)
|
|
668
|
+
else:
|
|
669
|
+
previous = ", ".join([display_names[n] for n, _, _ in council_config])
|
|
670
|
+
system_prompt = council_system.format(
|
|
671
|
+
name=dname,
|
|
672
|
+
round_num=round_num + 1,
|
|
673
|
+
previous_speakers=previous
|
|
674
|
+
)
|
|
675
|
+
|
|
676
|
+
if social_mode:
|
|
677
|
+
system_prompt += social_constraint
|
|
678
|
+
|
|
679
|
+
if persona:
|
|
680
|
+
system_prompt += f"""
|
|
681
|
+
|
|
682
|
+
IMPORTANT CONTEXT about the person asking:
|
|
683
|
+
{persona}
|
|
684
|
+
|
|
685
|
+
Factor this into your advice — don't just give strategically optimal answers, consider what fits THIS person."""
|
|
686
|
+
|
|
687
|
+
if idx == advocate_idx and round_num == 0:
|
|
688
|
+
system_prompt += devils_advocate_addition
|
|
689
|
+
|
|
690
|
+
user_content = f"Question for the council:\n\n{question}"
|
|
691
|
+
if blind_context:
|
|
692
|
+
user_content += f"\n\n---\n\nBLIND CLAIMS (independent initial positions):\n\n{blind_context}"
|
|
693
|
+
|
|
694
|
+
messages = [
|
|
695
|
+
{"role": "system", "content": system_prompt},
|
|
696
|
+
{"role": "user", "content": user_content},
|
|
697
|
+
]
|
|
698
|
+
|
|
699
|
+
for speaker, text in conversation:
|
|
700
|
+
speaker_dname = display_names[speaker]
|
|
701
|
+
sanitized_text = sanitize_speaker_content(text)
|
|
702
|
+
messages.append({
|
|
703
|
+
"role": "assistant" if speaker == name else "user",
|
|
704
|
+
"content": f"[{speaker_dname}]: {sanitized_text}" if speaker != name else sanitized_text,
|
|
705
|
+
})
|
|
706
|
+
|
|
707
|
+
model_name = model.split("/")[-1]
|
|
708
|
+
|
|
709
|
+
if verbose:
|
|
710
|
+
print(f"### {model_name}")
|
|
711
|
+
if is_thinking_model(model):
|
|
712
|
+
print("(thinking...)", flush=True)
|
|
713
|
+
|
|
714
|
+
response = query_model(api_key, model, messages, stream=verbose)
|
|
715
|
+
|
|
716
|
+
used_fallback = False
|
|
717
|
+
if response.startswith("[") and fallback:
|
|
718
|
+
fallback_provider, fallback_model = fallback
|
|
719
|
+
|
|
720
|
+
if fallback_provider == "google" and google_api_key:
|
|
721
|
+
if verbose:
|
|
722
|
+
print(f"(OpenRouter failed, trying AI Studio fallback: {fallback_model}...)", flush=True)
|
|
723
|
+
response = query_google_ai_studio(google_api_key, fallback_model, messages)
|
|
724
|
+
used_fallback = True
|
|
725
|
+
model_name = fallback_model
|
|
726
|
+
|
|
727
|
+
elif fallback_provider == "moonshot" and moonshot_api_key:
|
|
728
|
+
if verbose:
|
|
729
|
+
print(f"(OpenRouter failed, trying Moonshot fallback: {fallback_model}...)", flush=True)
|
|
730
|
+
response = query_moonshot(moonshot_api_key, fallback_model, messages)
|
|
731
|
+
used_fallback = True
|
|
732
|
+
model_name = fallback_model
|
|
733
|
+
|
|
734
|
+
if verbose and (is_thinking_model(model) or used_fallback):
|
|
735
|
+
print(response)
|
|
736
|
+
|
|
737
|
+
if response.startswith("["):
|
|
738
|
+
failed_models.append(f"{model_name}: {response}")
|
|
739
|
+
|
|
740
|
+
conversation.append((name, response))
|
|
741
|
+
round_speakers.append(dname)
|
|
742
|
+
|
|
743
|
+
if verbose:
|
|
744
|
+
print()
|
|
745
|
+
|
|
746
|
+
output_parts.append(f"### {model_name}\n{response}")
|
|
747
|
+
|
|
748
|
+
converged, reason = detect_consensus(conversation, len(council_config))
|
|
749
|
+
if converged:
|
|
750
|
+
if verbose:
|
|
751
|
+
print(f">>> CONSENSUS DETECTED ({reason}) - proceeding to judge\n")
|
|
752
|
+
break
|
|
753
|
+
|
|
754
|
+
# Judge synthesis
|
|
755
|
+
context_hint = ""
|
|
756
|
+
if context:
|
|
757
|
+
context_hint = f"\n\nContext about this question: {context}\nConsider this context when weighing perspectives and forming recommendations."
|
|
758
|
+
|
|
759
|
+
social_judge_section = ""
|
|
760
|
+
if social_mode:
|
|
761
|
+
social_judge_section = """
|
|
762
|
+
|
|
763
|
+
## Social Calibration Check
|
|
764
|
+
[Would the recommendation feel natural in conversation? Is it something you'd actually say, or does it sound like strategic over-optimization? If the council produced something too formal/structured, suggest a simpler, more human alternative.]"""
|
|
765
|
+
|
|
766
|
+
judge_system = f"""You are the Judge, responsible for synthesizing the council's deliberation.{context_hint}
|
|
767
|
+
|
|
768
|
+
After the council members have shared their perspectives, you:
|
|
769
|
+
1. Identify points of AGREEMENT across all members
|
|
770
|
+
2. Identify points of DISAGREEMENT and explain the different views
|
|
771
|
+
3. Provide a SYNTHESIS that captures the council's collective wisdom
|
|
772
|
+
4. Give a final RECOMMENDATION based on the deliberation
|
|
773
|
+
{"5. SOCIAL CALIBRATION: Check if the recommendation would feel natural in actual conversation" if social_mode else ""}
|
|
774
|
+
|
|
775
|
+
Format your response as:
|
|
776
|
+
|
|
777
|
+
## Points of Agreement
|
|
778
|
+
[What the council agrees on]
|
|
779
|
+
|
|
780
|
+
## Points of Disagreement
|
|
781
|
+
[Where views differ and why]
|
|
782
|
+
|
|
783
|
+
## Synthesis
|
|
784
|
+
[The integrated perspective]
|
|
785
|
+
|
|
786
|
+
## Recommendation
|
|
787
|
+
[Your final recommendation based on the deliberation]
|
|
788
|
+
{social_judge_section}
|
|
789
|
+
Be balanced and fair. Acknowledge minority views. Don't just pick a winner.{" For social contexts, prioritize natural/human output over strategic optimization." if social_mode else ""}
|
|
790
|
+
|
|
791
|
+
IMPORTANT: In your Recommendation, clearly distinguish:
|
|
792
|
+
- **Do Now** — practical actions the user can take immediately
|
|
793
|
+
- **Consider Later** — interesting ideas that require more infrastructure or scale
|
|
794
|
+
|
|
795
|
+
Don't recommend building infrastructure for problems that don't exist yet."""
|
|
796
|
+
|
|
797
|
+
deliberation_text = "\n\n".join(
|
|
798
|
+
f"**{display_names[speaker]}**: {sanitize_speaker_content(text)}" for speaker, text in conversation
|
|
799
|
+
)
|
|
800
|
+
|
|
801
|
+
judge_messages = [
|
|
802
|
+
{"role": "system", "content": judge_system},
|
|
803
|
+
{"role": "user", "content": f"Question:\n{question}\n\n---\n\nCouncil Deliberation:\n\n{deliberation_text}"},
|
|
804
|
+
]
|
|
805
|
+
|
|
806
|
+
judge_model_name = JUDGE_MODEL.split("/")[-1]
|
|
807
|
+
|
|
808
|
+
if verbose:
|
|
809
|
+
print(f"### Judge ({judge_model_name})")
|
|
810
|
+
|
|
811
|
+
judge_response = query_model(api_key, JUDGE_MODEL, judge_messages, max_tokens=1200, stream=verbose)
|
|
812
|
+
|
|
813
|
+
if verbose:
|
|
814
|
+
print()
|
|
815
|
+
|
|
816
|
+
output_parts.append(f"### Judge ({judge_model_name})\n{judge_response}")
|
|
817
|
+
|
|
818
|
+
if anonymous:
|
|
819
|
+
final_output = "\n\n".join(output_parts)
|
|
820
|
+
for name, model, _ in council_config:
|
|
821
|
+
anon_name = display_names[name]
|
|
822
|
+
model_name = model.split("/")[-1]
|
|
823
|
+
final_output = final_output.replace(f"### {anon_name}", f"### {model_name}")
|
|
824
|
+
final_output = final_output.replace(f"[{anon_name}]", f"[{model_name}]")
|
|
825
|
+
final_output = final_output.replace(f"**{anon_name}**", f"**{model_name}**")
|
|
826
|
+
final_output = final_output.replace(f"with {anon_name}", f"with {model_name}")
|
|
827
|
+
final_output = final_output.replace(f"{anon_name}'s", f"{model_name}'s")
|
|
828
|
+
return final_output, failed_models
|
|
829
|
+
|
|
830
|
+
return "\n\n".join(output_parts), failed_models
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: frontier-council
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Multi-model deliberation for important decisions. 5 frontier LLMs debate, then a judge synthesizes consensus.
|
|
5
|
+
Project-URL: Homepage, https://github.com/terry-li-hm/skills
|
|
6
|
+
Author-email: Terry Li <terry.li.hm@gmail.com>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
Keywords: ai,council,debate,deliberation,frontier,llm,multi-model,openrouter
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Environment :: Console
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
17
|
+
Requires-Python: >=3.11
|
|
18
|
+
Requires-Dist: httpx>=0.25.0
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# Frontier Council
|
|
22
|
+
|
|
23
|
+
Multi-model deliberation for important decisions. 5 frontier LLMs debate a question, then a judge synthesizes consensus.
|
|
24
|
+
|
|
25
|
+
Inspired by [Andrej Karpathy's LLM Council](https://github.com/karpathy/llm-council), with added blind phase (anti-anchoring), explicit engagement requirements, devil's advocate role, and social calibration mode.
|
|
26
|
+
|
|
27
|
+
## Models
|
|
28
|
+
|
|
29
|
+
- Claude (claude-opus-4.5)
|
|
30
|
+
- GPT (gpt-5.2-pro)
|
|
31
|
+
- Gemini (gemini-3-pro-preview)
|
|
32
|
+
- Grok (grok-4)
|
|
33
|
+
- Kimi (kimi-k2.5)
|
|
34
|
+
- Judge: Claude Opus 4.5
|
|
35
|
+
|
|
36
|
+
## Installation
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install frontier-council
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Or with uv:
|
|
43
|
+
```bash
|
|
44
|
+
uv tool install frontier-council
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Setup
|
|
48
|
+
|
|
49
|
+
Set your OpenRouter API key:
|
|
50
|
+
```bash
|
|
51
|
+
export OPENROUTER_API_KEY=sk-or-v1-...
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Optional fallback keys (for flaky models):
|
|
55
|
+
```bash
|
|
56
|
+
export GOOGLE_API_KEY=AIza... # Gemini fallback
|
|
57
|
+
export MOONSHOT_API_KEY=sk-... # Kimi fallback
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Usage
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
# Basic question
|
|
64
|
+
frontier-council "Should we use microservices or monolith?"
|
|
65
|
+
|
|
66
|
+
# With social calibration (for interview/networking questions)
|
|
67
|
+
frontier-council "What questions should I ask in the interview?" --social
|
|
68
|
+
|
|
69
|
+
# With persona context
|
|
70
|
+
frontier-council "Should I take the job?" --persona "builder who hates process work"
|
|
71
|
+
|
|
72
|
+
# Multiple rounds
|
|
73
|
+
frontier-council "Architecture decision" --rounds 3
|
|
74
|
+
|
|
75
|
+
# Save transcript
|
|
76
|
+
frontier-council "Career question" --output transcript.md
|
|
77
|
+
|
|
78
|
+
# Share via GitHub Gist
|
|
79
|
+
frontier-council "Important decision" --share
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Options
|
|
83
|
+
|
|
84
|
+
| Flag | Description |
|
|
85
|
+
|------|-------------|
|
|
86
|
+
| `--rounds N` | Number of deliberation rounds (default: 2, exits early on consensus) |
|
|
87
|
+
| `--output FILE` | Save transcript to file |
|
|
88
|
+
| `--named` | Let models see real names during deliberation (may increase bias) |
|
|
89
|
+
| `--no-blind` | Skip blind first-pass (faster, but first speaker anchors others) |
|
|
90
|
+
| `--context TEXT` | Context hint for judge (e.g., "architecture decision") |
|
|
91
|
+
| `--share` | Upload transcript to secret GitHub Gist |
|
|
92
|
+
| `--social` | Enable social calibration mode (auto-detected for interview/networking) |
|
|
93
|
+
| `--persona TEXT` | Context about the person asking |
|
|
94
|
+
| `--advocate N` | Which speaker (1-5) should be devil's advocate (default: random) |
|
|
95
|
+
| `--quiet` | Suppress progress output |
|
|
96
|
+
|
|
97
|
+
## How It Works
|
|
98
|
+
|
|
99
|
+
**Blind First-Pass (Anti-Anchoring):**
|
|
100
|
+
1. All models generate short "claim sketches" independently and in parallel
|
|
101
|
+
2. This prevents the "first speaker lottery" where whoever speaks first anchors the debate
|
|
102
|
+
3. Each model commits to an initial position before seeing any other responses
|
|
103
|
+
|
|
104
|
+
**Deliberation Protocol:**
|
|
105
|
+
1. All models see everyone's blind claims, then deliberate
|
|
106
|
+
2. Each model MUST explicitly AGREE, DISAGREE, or BUILD ON previous speakers by name
|
|
107
|
+
3. After each round, the system checks for consensus (4/5 agreement triggers early exit)
|
|
108
|
+
4. Judge synthesizes the full deliberation
|
|
109
|
+
|
|
110
|
+
**Anonymous Deliberation:**
|
|
111
|
+
- Models see each other as "Speaker 1", "Speaker 2", etc. during deliberation
|
|
112
|
+
- Prevents models from playing favorites based on vendor reputation
|
|
113
|
+
- Output transcript shows real model names for readability
|
|
114
|
+
|
|
115
|
+
## When to Use
|
|
116
|
+
|
|
117
|
+
Use the council when:
|
|
118
|
+
- Making an important decision that benefits from diverse perspectives
|
|
119
|
+
- You want models to actually debate, not just answer in parallel
|
|
120
|
+
- You need a synthesized recommendation, not raw comparison
|
|
121
|
+
- Exploring trade-offs where different viewpoints matter
|
|
122
|
+
|
|
123
|
+
Skip the council when:
|
|
124
|
+
- You're just thinking out loud (exploratory discussions)
|
|
125
|
+
- The answer depends on personal preference more than objective trade-offs
|
|
126
|
+
- Speed matters (council takes 60-90 seconds)
|
|
127
|
+
|
|
128
|
+
## Python API
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
from frontier_council import run_council, COUNCIL
|
|
132
|
+
import os
|
|
133
|
+
|
|
134
|
+
api_key = os.environ["OPENROUTER_API_KEY"]
|
|
135
|
+
|
|
136
|
+
transcript, failed_models = run_council(
|
|
137
|
+
question="Should we use microservices or monolith?",
|
|
138
|
+
council_config=COUNCIL,
|
|
139
|
+
api_key=api_key,
|
|
140
|
+
rounds=2,
|
|
141
|
+
verbose=True,
|
|
142
|
+
social_mode=False,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
print(transcript)
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## License
|
|
149
|
+
|
|
150
|
+
MIT
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
frontier_council/__init__.py,sha256=VfpeWHijQ1z8zd_ecKPIjI6S9VT3E6yAQ1PVt8-eExU,357
|
|
2
|
+
frontier_council/cli.py,sha256=BbM1cHWjAA0DBn4uyGKajaNAMxhKYQt-ZGNQ6hTnlkc,7337
|
|
3
|
+
frontier_council/council.py,sha256=u2ir34dNostBOhXUi1R0wFEfBIEgiRX8thiS5lRFnnU,30226
|
|
4
|
+
frontier_council-0.1.0.dist-info/METADATA,sha256=hZGQzWU0DUtuexLkKR4sXVHFjDbw75bYg02uQ8dFAB4,4792
|
|
5
|
+
frontier_council-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
6
|
+
frontier_council-0.1.0.dist-info/entry_points.txt,sha256=I3xjPK-nupfQz5PANVXUnXjuxlP-4-mykkA3wXhFOGY,63
|
|
7
|
+
frontier_council-0.1.0.dist-info/RECORD,,
|