frontier-council 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ """Frontier Council - Multi-model deliberation for important decisions."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ from .council import (
6
+ run_council,
7
+ run_blind_phase_parallel,
8
+ detect_social_context,
9
+ COUNCIL,
10
+ JUDGE_MODEL,
11
+ )
12
+
13
+ __all__ = [
14
+ "run_council",
15
+ "run_blind_phase_parallel",
16
+ "detect_social_context",
17
+ "COUNCIL",
18
+ "JUDGE_MODEL",
19
+ ]
@@ -0,0 +1,214 @@
1
+ """CLI entry point for frontier-council."""
2
+
3
+ import argparse
4
+ import json
5
+ import os
6
+ import random
7
+ import subprocess
8
+ import sys
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+
12
+ from .council import (
13
+ COUNCIL,
14
+ detect_social_context,
15
+ run_council,
16
+ )
17
+
18
+
19
+ def main():
20
+ parser = argparse.ArgumentParser(
21
+ description="LLM Council - Multi-model deliberation for important decisions",
22
+ formatter_class=argparse.RawDescriptionHelpFormatter,
23
+ epilog="""
24
+ Examples:
25
+ frontier-council "Should we use microservices or monolith?"
26
+ frontier-council "What questions should I ask?" --social
27
+ frontier-council "Career decision" --persona "builder who hates process work"
28
+ frontier-council "Architecture choice" --rounds 3 --output transcript.md
29
+ """,
30
+ )
31
+ parser.add_argument("question", help="The question for the council to deliberate")
32
+ parser.add_argument(
33
+ "--rounds",
34
+ type=int,
35
+ default=2,
36
+ help="Number of deliberation rounds (default: 2, exits early on consensus)",
37
+ )
38
+ parser.add_argument(
39
+ "--quiet",
40
+ action="store_true",
41
+ help="Suppress progress output",
42
+ )
43
+ parser.add_argument(
44
+ "--output", "-o",
45
+ help="Save transcript to file",
46
+ )
47
+ parser.add_argument(
48
+ "--named",
49
+ action="store_true",
50
+ help="Show real model names instead of anonymous Speaker 1, 2, etc.",
51
+ )
52
+ parser.add_argument(
53
+ "--no-blind",
54
+ action="store_true",
55
+ help="Skip blind first-pass (faster, but more anchoring bias)",
56
+ )
57
+ parser.add_argument(
58
+ "--context", "-c",
59
+ help="Context hint for the judge (e.g., 'architecture decision', 'ethics question')",
60
+ )
61
+ parser.add_argument(
62
+ "--share",
63
+ action="store_true",
64
+ help="Upload transcript to secret GitHub Gist and print URL",
65
+ )
66
+ parser.add_argument(
67
+ "--social",
68
+ action="store_true",
69
+ help="Enable social calibration mode (for interview questions, outreach, networking)",
70
+ )
71
+ parser.add_argument(
72
+ "--persona", "-p",
73
+ help="Context about the person asking (e.g., 'builder who hates process work')",
74
+ )
75
+ parser.add_argument(
76
+ "--advocate",
77
+ type=int,
78
+ choices=[1, 2, 3, 4, 5],
79
+ help="Which speaker (1-5) should be devil's advocate (default: random)",
80
+ )
81
+ args = parser.parse_args()
82
+
83
+ # Auto-detect social context if not explicitly set
84
+ social_mode = args.social or detect_social_context(args.question)
85
+ if social_mode and not args.social and not args.quiet:
86
+ print("(Auto-detected social context - enabling social calibration mode)")
87
+ print()
88
+
89
+ # Get API keys
90
+ api_key = os.environ.get("OPENROUTER_API_KEY")
91
+ if not api_key:
92
+ print("Error: OPENROUTER_API_KEY environment variable not set", file=sys.stderr)
93
+ sys.exit(1)
94
+
95
+ google_api_key = os.environ.get("GOOGLE_API_KEY")
96
+ moonshot_api_key = os.environ.get("MOONSHOT_API_KEY")
97
+
98
+ use_blind = not args.no_blind
99
+
100
+ if not args.quiet:
101
+ mode_parts = []
102
+ mode_parts.append("named" if args.named else "anonymous")
103
+ mode_parts.append("blind first-pass" if use_blind else "no blind phase")
104
+ if social_mode:
105
+ mode_parts.append("social calibration")
106
+ print(f"Running LLM Council ({', '.join(mode_parts)})...")
107
+ fallbacks = []
108
+ if google_api_key:
109
+ fallbacks.append("Gemini→AI Studio")
110
+ if moonshot_api_key:
111
+ fallbacks.append("Kimi→Moonshot")
112
+ if fallbacks:
113
+ print(f"(Fallbacks enabled: {', '.join(fallbacks)})")
114
+ print()
115
+
116
+ try:
117
+ advocate_idx = (args.advocate - 1) if args.advocate else random.randint(0, len(COUNCIL) - 1)
118
+
119
+ if not args.quiet and args.persona:
120
+ print(f"(Persona context: {args.persona})")
121
+ print()
122
+ if not args.quiet:
123
+ advocate_name = COUNCIL[advocate_idx][0]
124
+ print(f"(Devil's advocate: {advocate_name})")
125
+ print()
126
+
127
+ transcript, failed_models = run_council(
128
+ question=args.question,
129
+ council_config=COUNCIL,
130
+ api_key=api_key,
131
+ google_api_key=google_api_key,
132
+ moonshot_api_key=moonshot_api_key,
133
+ rounds=args.rounds,
134
+ verbose=not args.quiet,
135
+ anonymous=not args.named,
136
+ blind=use_blind,
137
+ context=args.context,
138
+ social_mode=social_mode,
139
+ persona=args.persona,
140
+ advocate_idx=advocate_idx,
141
+ )
142
+
143
+ # Print failure summary
144
+ if failed_models and not args.quiet:
145
+ print()
146
+ print("=" * 60)
147
+ print("⚠️ MODEL FAILURES")
148
+ print("=" * 60)
149
+ for failure in failed_models:
150
+ print(f" • {failure}")
151
+ working_count = len(COUNCIL) - len(set(f.split(":")[0].split(" (")[0] for f in failed_models))
152
+ print(f"\nCouncil ran with {working_count}/{len(COUNCIL)} models")
153
+ print("=" * 60)
154
+ print()
155
+
156
+ # Save transcript
157
+ if args.output:
158
+ Path(args.output).write_text(transcript)
159
+ if not args.quiet:
160
+ print(f"Transcript saved to: {args.output}")
161
+
162
+ # Share via gist
163
+ gist_url = None
164
+ if args.share:
165
+ try:
166
+ import tempfile
167
+ with tempfile.NamedTemporaryFile(
168
+ mode='w', suffix='.md', prefix='council-', delete=False
169
+ ) as f:
170
+ f.write(f"# LLM Council Deliberation\n\n")
171
+ f.write(f"**Question:** {args.question}\n\n")
172
+ if args.context:
173
+ f.write(f"**Context:** {args.context}\n\n")
174
+ f.write(f"**Date:** {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n---\n\n")
175
+ f.write(transcript)
176
+ temp_path = f.name
177
+
178
+ result = subprocess.run(
179
+ ["gh", "gist", "create", temp_path, "--desc", f"LLM Council: {args.question[:50]}"],
180
+ capture_output=True, text=True
181
+ )
182
+ os.unlink(temp_path)
183
+
184
+ if result.returncode == 0:
185
+ gist_url = result.stdout.strip()
186
+ print(f"\n🔗 Shared: {gist_url}")
187
+ else:
188
+ print(f"Gist creation failed: {result.stderr}", file=sys.stderr)
189
+ except FileNotFoundError:
190
+ print("Error: 'gh' CLI not found. Install with: brew install gh", file=sys.stderr)
191
+
192
+ # Log to history
193
+ history_file = Path(__file__).parent.parent / "council_history.jsonl"
194
+ log_entry = {
195
+ "timestamp": datetime.now().isoformat(),
196
+ "question": args.question[:200],
197
+ "gist": gist_url,
198
+ "context": args.context,
199
+ "rounds": args.rounds,
200
+ "blind": use_blind,
201
+ "models": [name for name, _, _ in COUNCIL],
202
+ }
203
+ with open(history_file, "a") as f:
204
+ f.write(json.dumps(log_entry) + "\n")
205
+
206
+ except Exception as e:
207
+ print(f"Error: {e}", file=sys.stderr)
208
+ import traceback
209
+ traceback.print_exc()
210
+ sys.exit(1)
211
+
212
+
213
+ if __name__ == "__main__":
214
+ main()
@@ -0,0 +1,830 @@
1
+ """Core council deliberation logic."""
2
+
3
+ import asyncio
4
+ import httpx
5
+ import json
6
+ import re
7
+ from pathlib import Path
8
+
9
+ OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
10
+ GOOGLE_AI_STUDIO_URL = "https://generativelanguage.googleapis.com/v1beta/models"
11
+ MOONSHOT_URL = "https://api.moonshot.cn/v1/chat/completions"
12
+
13
+ # Model configurations (all via OpenRouter, with fallbacks where available)
14
+ # Format: (name, openrouter_model, fallback) - fallback is (provider, model) or None
15
+ # Providers: "google" = AI Studio, "moonshot" = Moonshot API
16
+ COUNCIL = [
17
+ ("Claude", "anthropic/claude-opus-4.5", None),
18
+ ("GPT", "openai/gpt-5.2-pro", None),
19
+ ("Gemini", "google/gemini-3-pro-preview", ("google", "gemini-2.5-pro")),
20
+ ("Grok", "x-ai/grok-4", None),
21
+ ("Kimi", "moonshotai/kimi-k2.5", ("moonshot", "kimi-k2.5")),
22
+ ]
23
+
24
+ JUDGE_MODEL = "anthropic/claude-opus-4.5"
25
+
26
+ # Keywords that suggest social/conversational context (auto-detect)
27
+ SOCIAL_KEYWORDS = [
28
+ "interview", "ask him", "ask her", "ask them", "question to ask",
29
+ "networking", "outreach", "message", "email", "linkedin",
30
+ "coffee chat", "informational", "reach out", "follow up",
31
+ "what should i say", "how should i respond", "conversation",
32
+ ]
33
+
34
+ # Thinking models don't stream well - use non-streaming for these
35
+ THINKING_MODEL_SUFFIXES = {
36
+ "gemini-3-pro-preview",
37
+ "kimi-k2.5",
38
+ "deepseek-r1",
39
+ "o1-preview", "o1-mini", "o1",
40
+ "o3-preview", "o3-mini", "o3",
41
+ }
42
+
43
+
44
+ def is_thinking_model(model: str) -> bool:
45
+ """Check if model is a thinking model that doesn't stream well."""
46
+ model_name = model.split("/")[-1].lower()
47
+ return model_name in THINKING_MODEL_SUFFIXES
48
+
49
+
50
+ def detect_social_context(question: str) -> bool:
51
+ """Auto-detect if the question is about social/conversational context."""
52
+ question_lower = question.lower()
53
+ return any(keyword in question_lower for keyword in SOCIAL_KEYWORDS)
54
+
55
+
56
+ def query_model(
57
+ api_key: str,
58
+ model: str,
59
+ messages: list[dict],
60
+ max_tokens: int = 1500,
61
+ timeout: float = 120.0,
62
+ stream: bool = False,
63
+ retries: int = 2,
64
+ ) -> str:
65
+ """Query a model via OpenRouter with retry logic for flaky models."""
66
+ if is_thinking_model(model):
67
+ max_tokens = max(max_tokens, 4000)
68
+ timeout = max(timeout, 180.0)
69
+
70
+ if stream and not is_thinking_model(model):
71
+ result = query_model_streaming(api_key, model, messages, max_tokens, timeout)
72
+ if not result.startswith("["):
73
+ return result
74
+ print("(Streaming failed, retrying without streaming...)", flush=True)
75
+
76
+ for attempt in range(retries + 1):
77
+ try:
78
+ response = httpx.post(
79
+ OPENROUTER_URL,
80
+ headers={"Authorization": f"Bearer {api_key}"},
81
+ json={
82
+ "model": model,
83
+ "messages": messages,
84
+ "max_tokens": max_tokens,
85
+ },
86
+ timeout=timeout,
87
+ )
88
+ except (httpx.RequestError, httpx.RemoteProtocolError) as e:
89
+ if attempt < retries:
90
+ continue
91
+ return f"[Error: Connection failed for {model}: {e}]"
92
+
93
+ if response.status_code != 200:
94
+ if attempt < retries:
95
+ continue
96
+ return f"[Error: HTTP {response.status_code} from {model}]"
97
+
98
+ data = response.json()
99
+
100
+ if "error" in data:
101
+ if attempt < retries:
102
+ continue
103
+ return f"[Error: {data['error'].get('message', data['error'])}]"
104
+
105
+ if "choices" not in data or not data["choices"]:
106
+ if attempt < retries:
107
+ continue
108
+ return f"[Error: No response from {model}]"
109
+
110
+ content = data["choices"][0]["message"]["content"]
111
+
112
+ if not content or not content.strip():
113
+ reasoning = data["choices"][0]["message"].get("reasoning", "")
114
+ if reasoning and reasoning.strip():
115
+ if attempt < retries:
116
+ continue
117
+ return f"[Model still thinking - needs more tokens. Partial reasoning: {reasoning[:150]}...]"
118
+ if attempt < retries:
119
+ continue
120
+ return f"[No response from {model} after {retries + 1} attempts]"
121
+
122
+ if "<think>" in content:
123
+ content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL).strip()
124
+
125
+ return content
126
+
127
+ return f"[Error: Failed to get response from {model}]"
128
+
129
+
130
+ def query_google_ai_studio(
131
+ api_key: str,
132
+ model: str,
133
+ messages: list[dict],
134
+ max_tokens: int = 8192,
135
+ timeout: float = 120.0,
136
+ retries: int = 2,
137
+ ) -> str:
138
+ """Query Google AI Studio directly (fallback for Gemini models)."""
139
+ contents = []
140
+ system_instruction = None
141
+
142
+ for msg in messages:
143
+ role = msg["role"]
144
+ content = msg["content"]
145
+
146
+ if role == "system":
147
+ system_instruction = content
148
+ elif role == "user":
149
+ contents.append({"role": "user", "parts": [{"text": content}]})
150
+ elif role == "assistant":
151
+ contents.append({"role": "model", "parts": [{"text": content}]})
152
+
153
+ body = {
154
+ "contents": contents,
155
+ "generationConfig": {
156
+ "maxOutputTokens": max_tokens,
157
+ }
158
+ }
159
+ if system_instruction:
160
+ body["systemInstruction"] = {"parts": [{"text": system_instruction}]}
161
+
162
+ url = f"{GOOGLE_AI_STUDIO_URL}/{model}:generateContent?key={api_key}"
163
+
164
+ for attempt in range(retries + 1):
165
+ try:
166
+ response = httpx.post(url, json=body, timeout=timeout)
167
+
168
+ if response.status_code != 200:
169
+ if attempt < retries:
170
+ continue
171
+ return f"[Error: HTTP {response.status_code} from AI Studio {model}]"
172
+
173
+ data = response.json()
174
+
175
+ if "error" in data:
176
+ if attempt < retries:
177
+ continue
178
+ return f"[Error: {data['error'].get('message', data['error'])}]"
179
+
180
+ candidates = data.get("candidates", [])
181
+ if not candidates:
182
+ if attempt < retries:
183
+ continue
184
+ return f"[Error: No candidates from AI Studio {model}]"
185
+
186
+ parts = candidates[0].get("content", {}).get("parts", [])
187
+ if not parts:
188
+ if attempt < retries:
189
+ continue
190
+ return f"[Error: No content from AI Studio {model}]"
191
+
192
+ content = parts[0].get("text", "")
193
+ if not content.strip():
194
+ if attempt < retries:
195
+ continue
196
+ return f"[No response from AI Studio {model} after {retries + 1} attempts]"
197
+
198
+ return content
199
+
200
+ except httpx.TimeoutException:
201
+ if attempt < retries:
202
+ continue
203
+ return f"[Error: Timeout from AI Studio {model}]"
204
+ except httpx.RequestError as e:
205
+ if attempt < retries:
206
+ continue
207
+ return f"[Error: Request failed for AI Studio {model}: {e}]"
208
+
209
+ return f"[Error: Failed to get response from AI Studio {model}]"
210
+
211
+
212
+ def query_moonshot(
213
+ api_key: str,
214
+ model: str,
215
+ messages: list[dict],
216
+ max_tokens: int = 8192,
217
+ timeout: float = 120.0,
218
+ retries: int = 2,
219
+ ) -> str:
220
+ """Query Moonshot API directly (fallback for Kimi models)."""
221
+ for attempt in range(retries + 1):
222
+ try:
223
+ response = httpx.post(
224
+ MOONSHOT_URL,
225
+ headers={"Authorization": f"Bearer {api_key}"},
226
+ json={
227
+ "model": model,
228
+ "messages": messages,
229
+ "max_tokens": max_tokens,
230
+ },
231
+ timeout=timeout,
232
+ )
233
+
234
+ if response.status_code != 200:
235
+ if attempt < retries:
236
+ continue
237
+ return f"[Error: HTTP {response.status_code} from Moonshot {model}]"
238
+
239
+ data = response.json()
240
+
241
+ if "error" in data:
242
+ if attempt < retries:
243
+ continue
244
+ return f"[Error: {data['error'].get('message', data['error'])}]"
245
+
246
+ if "choices" not in data or not data["choices"]:
247
+ if attempt < retries:
248
+ continue
249
+ return f"[Error: No response from Moonshot {model}]"
250
+
251
+ content = data["choices"][0]["message"]["content"]
252
+
253
+ if not content or not content.strip():
254
+ if attempt < retries:
255
+ continue
256
+ return f"[No response from Moonshot {model} after {retries + 1} attempts]"
257
+
258
+ return content
259
+
260
+ except httpx.TimeoutException:
261
+ if attempt < retries:
262
+ continue
263
+ return f"[Error: Timeout from Moonshot {model}]"
264
+ except httpx.RequestError as e:
265
+ if attempt < retries:
266
+ continue
267
+ return f"[Error: Request failed for Moonshot {model}: {e}]"
268
+
269
+ return f"[Error: Failed to get response from Moonshot {model}]"
270
+
271
+
272
+ def query_model_streaming(
273
+ api_key: str,
274
+ model: str,
275
+ messages: list[dict],
276
+ max_tokens: int = 1500,
277
+ timeout: float = 120.0,
278
+ ) -> str:
279
+ """Query a model with streaming output - prints tokens as they arrive."""
280
+ import json as json_module
281
+
282
+ full_content = []
283
+ in_think_block = False
284
+ error_msg = None
285
+
286
+ try:
287
+ with httpx.stream(
288
+ "POST",
289
+ OPENROUTER_URL,
290
+ headers={"Authorization": f"Bearer {api_key}"},
291
+ json={
292
+ "model": model,
293
+ "messages": messages,
294
+ "max_tokens": max_tokens,
295
+ "stream": True,
296
+ },
297
+ timeout=timeout,
298
+ ) as response:
299
+ if response.status_code != 200:
300
+ error_msg = f"[Error: HTTP {response.status_code} from {model}]"
301
+ else:
302
+ for line in response.iter_lines():
303
+ if not line or line.startswith(":"):
304
+ continue
305
+
306
+ if line.startswith("data: "):
307
+ data_str = line[6:]
308
+ if data_str.strip() == "[DONE]":
309
+ break
310
+
311
+ try:
312
+ data = json_module.loads(data_str)
313
+ if "error" in data:
314
+ error_msg = f"[Error: {data['error'].get('message', data['error'])}]"
315
+ break
316
+
317
+ if "choices" in data and data["choices"]:
318
+ delta = data["choices"][0].get("delta", {})
319
+ content = delta.get("content", "")
320
+ if content:
321
+ if "<think>" in content:
322
+ in_think_block = True
323
+ if in_think_block:
324
+ if "</think>" in content:
325
+ in_think_block = False
326
+ content = content.split("</think>", 1)[-1]
327
+ else:
328
+ continue
329
+
330
+ if content:
331
+ print(content, end="", flush=True)
332
+ full_content.append(content)
333
+ except json_module.JSONDecodeError:
334
+ pass
335
+
336
+ except httpx.TimeoutException:
337
+ error_msg = f"[Error: Timeout from {model}]"
338
+ except (httpx.RequestError, httpx.RemoteProtocolError) as e:
339
+ error_msg = f"[Error: Connection failed for {model}: {e}]"
340
+
341
+ print()
342
+
343
+ if error_msg:
344
+ print(error_msg)
345
+ return error_msg
346
+
347
+ if not full_content:
348
+ empty_msg = f"[No response from {model}]"
349
+ print(empty_msg)
350
+ return empty_msg
351
+
352
+ return "".join(full_content)
353
+
354
+
355
+ async def query_model_async(
356
+ client: httpx.AsyncClient,
357
+ model: str,
358
+ messages: list[dict],
359
+ name: str,
360
+ fallback: tuple[str, str] | None = None,
361
+ google_api_key: str | None = None,
362
+ moonshot_api_key: str | None = None,
363
+ max_tokens: int = 500,
364
+ retries: int = 2,
365
+ ) -> tuple[str, str, str]:
366
+ """Async query for parallel blind phase. Returns (name, model_name, response)."""
367
+ if is_thinking_model(model):
368
+ max_tokens = max(max_tokens, 2000)
369
+
370
+ model_name = model.split("/")[-1]
371
+
372
+ for attempt in range(retries + 1):
373
+ try:
374
+ response = await client.post(
375
+ OPENROUTER_URL,
376
+ json={
377
+ "model": model,
378
+ "messages": messages,
379
+ "max_tokens": max_tokens,
380
+ },
381
+ )
382
+
383
+ if response.status_code != 200:
384
+ if attempt < retries:
385
+ continue
386
+ break
387
+
388
+ data = response.json()
389
+
390
+ if "error" in data:
391
+ if attempt < retries:
392
+ continue
393
+ break
394
+
395
+ if "choices" not in data or not data["choices"]:
396
+ if attempt < retries:
397
+ continue
398
+ break
399
+
400
+ content = data["choices"][0]["message"]["content"]
401
+
402
+ if not content or not content.strip():
403
+ reasoning = data["choices"][0]["message"].get("reasoning", "")
404
+ if reasoning and reasoning.strip():
405
+ if attempt < retries:
406
+ continue
407
+ return (name, model_name, f"[Model still thinking - increase max_tokens. Partial: {reasoning[:200]}...]")
408
+ if attempt < retries:
409
+ continue
410
+ break
411
+
412
+ if "<think>" in content:
413
+ content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL).strip()
414
+
415
+ return (name, model_name, content)
416
+
417
+ except (httpx.RequestError, httpx.RemoteProtocolError):
418
+ if attempt < retries:
419
+ continue
420
+ break
421
+
422
+ # Try fallbacks synchronously
423
+ if fallback:
424
+ fallback_provider, fallback_model = fallback
425
+ if fallback_provider == "google" and google_api_key:
426
+ response = query_google_ai_studio(google_api_key, fallback_model, messages, max_tokens=max_tokens)
427
+ return (name, fallback_model, response)
428
+ elif fallback_provider == "moonshot" and moonshot_api_key:
429
+ response = query_moonshot(moonshot_api_key, fallback_model, messages, max_tokens=max_tokens)
430
+ return (name, fallback_model, response)
431
+
432
+ return (name, model_name, f"[No response from {model_name} after {retries + 1} attempts]")
433
+
434
+
435
+ async def run_blind_phase_parallel(
436
+ question: str,
437
+ council_config: list[tuple[str, str, tuple[str, str] | None]],
438
+ api_key: str,
439
+ google_api_key: str | None = None,
440
+ moonshot_api_key: str | None = None,
441
+ verbose: bool = True,
442
+ persona: str | None = None,
443
+ ) -> list[tuple[str, str, str]]:
444
+ """Parallel blind first-pass: all models stake claims simultaneously."""
445
+ blind_system = """You are participating in the BLIND PHASE of a council deliberation.
446
+
447
+ Stake your initial position on the question BEFORE seeing what others think.
448
+ This prevents anchoring bias.
449
+
450
+ Provide a CLAIM SKETCH (not a full response):
451
+ 1. Your core position (1-2 sentences)
452
+ 2. Top 3 supporting claims or considerations
453
+ 3. Key assumption or uncertainty
454
+
455
+ Keep it concise (~100 words). The full deliberation comes later."""
456
+
457
+ if persona:
458
+ blind_system += f"""
459
+
460
+ IMPORTANT CONTEXT about the person asking:
461
+ {persona}
462
+
463
+ Factor this into your advice — don't just give strategically optimal answers, consider what fits THIS person."""
464
+
465
+ if verbose:
466
+ print("=" * 60)
467
+ print("BLIND PHASE (independent claims)")
468
+ print("=" * 60)
469
+ print()
470
+
471
+ messages = [
472
+ {"role": "system", "content": blind_system},
473
+ {"role": "user", "content": f"Question:\n\n{question}"},
474
+ ]
475
+
476
+ async with httpx.AsyncClient(
477
+ headers={"Authorization": f"Bearer {api_key}"},
478
+ timeout=120.0,
479
+ ) as client:
480
+ tasks = [
481
+ query_model_async(
482
+ client, model, messages, name, fallback,
483
+ google_api_key, moonshot_api_key
484
+ )
485
+ for name, model, fallback in council_config
486
+ ]
487
+
488
+ if verbose:
489
+ print("(querying all models in parallel...)")
490
+
491
+ results = await asyncio.gather(*tasks, return_exceptions=True)
492
+
493
+ blind_claims = []
494
+ for i, result in enumerate(results):
495
+ name, model, _ = council_config[i]
496
+ model_name = model.split("/")[-1]
497
+
498
+ if isinstance(result, Exception):
499
+ blind_claims.append((name, model_name, f"[Error: {result}]"))
500
+ else:
501
+ blind_claims.append(result)
502
+
503
+ if verbose:
504
+ print()
505
+ for name, model_name, claims in blind_claims:
506
+ print(f"### {model_name} (blind)")
507
+ print(claims)
508
+ print()
509
+
510
+ return blind_claims
511
+
512
+
513
+ def sanitize_speaker_content(content: str) -> str:
514
+ """Sanitize speaker content to prevent prompt injection."""
515
+ sanitized = content.replace("SYSTEM:", "[SYSTEM]:")
516
+ sanitized = sanitized.replace("INSTRUCTION:", "[INSTRUCTION]:")
517
+ sanitized = sanitized.replace("IGNORE PREVIOUS", "[IGNORE PREVIOUS]")
518
+ sanitized = sanitized.replace("OVERRIDE:", "[OVERRIDE]:")
519
+ return sanitized
520
+
521
+
522
+ def detect_consensus(conversation: list[tuple[str, str]], council_size: int) -> tuple[bool, str]:
523
+ """Detect if council has converged. Returns (converged, reason)."""
524
+ if len(conversation) < council_size:
525
+ return False, "insufficient responses"
526
+
527
+ recent = [text for _, text in conversation[-council_size:]]
528
+
529
+ consensus_count = sum(1 for text in recent if "CONSENSUS:" in text.upper())
530
+ if consensus_count >= council_size - 1:
531
+ return True, "explicit consensus signals"
532
+
533
+ agreement_phrases = ["i agree with", "i concur", "we all agree", "consensus emerging"]
534
+ agreement_count = sum(
535
+ 1 for text in recent
536
+ if any(phrase in text.lower() for phrase in agreement_phrases)
537
+ )
538
+ if agreement_count >= council_size - 1:
539
+ return True, "agreement language detected"
540
+
541
+ return False, "no consensus"
542
+
543
+
544
+ def run_council(
545
+ question: str,
546
+ council_config: list[tuple[str, str, tuple[str, str] | None]],
547
+ api_key: str,
548
+ google_api_key: str | None = None,
549
+ moonshot_api_key: str | None = None,
550
+ rounds: int = 1,
551
+ verbose: bool = True,
552
+ anonymous: bool = True,
553
+ blind: bool = True,
554
+ context: str | None = None,
555
+ social_mode: bool = False,
556
+ persona: str | None = None,
557
+ advocate_idx: int | None = None,
558
+ ) -> tuple[str, list[str]]:
559
+ """Run the council deliberation. Returns (transcript, failed_models)."""
560
+
561
+ council_names = [name for name, _, _ in council_config]
562
+ blind_claims = []
563
+ failed_models = []
564
+
565
+ if blind:
566
+ blind_claims = asyncio.run(run_blind_phase_parallel(
567
+ question, council_config, api_key, google_api_key, moonshot_api_key, verbose, persona
568
+ ))
569
+ for name, model_name, claims in blind_claims:
570
+ if claims.startswith("["):
571
+ failed_models.append(f"{model_name} (blind): {claims}")
572
+
573
+ if anonymous:
574
+ display_names = {name: f"Speaker {i+1}" for i, (name, _, _) in enumerate(council_config)}
575
+ else:
576
+ display_names = {name: name for name, _, _ in council_config}
577
+
578
+ if verbose:
579
+ print(f"Council members: {council_names}")
580
+ if anonymous:
581
+ print("(Models see each other as Speaker 1, 2, etc. to prevent bias)")
582
+ print(f"Rounds: {rounds}")
583
+ print(f"Question: {question[:100]}{'...' if len(question) > 100 else ''}")
584
+ print()
585
+ print("=" * 60)
586
+ print("COUNCIL DELIBERATION")
587
+ print("=" * 60)
588
+ print()
589
+
590
+ conversation = []
591
+ output_parts = []
592
+
593
+ if blind_claims:
594
+ for name, model_name, claims in blind_claims:
595
+ output_parts.append(f"### {model_name} (blind)\n{claims}")
596
+
597
+ blind_context = ""
598
+ if blind_claims:
599
+ blind_lines = []
600
+ for name, _, claims in blind_claims:
601
+ dname = display_names[name]
602
+ blind_lines.append(f"**{dname}**: {sanitize_speaker_content(claims)}")
603
+ blind_context = "\n\n".join(blind_lines)
604
+
605
+ social_constraint = """
606
+
607
+ SOCIAL CALIBRATION: This is a social/conversational context (interview, networking, outreach).
608
+ Your output should feel natural in conversation - something you'd actually say over coffee.
609
+ Avoid structured, multi-part diagnostic questions that sound like interrogation.
610
+ Simple and human beats strategic and comprehensive. Optimize for being relatable, not thorough."""
611
+
612
+ devils_advocate_addition = """
613
+
614
+ SPECIAL ROLE: You are the DEVIL'S ADVOCATE. Your job is to push back HARD.
615
+
616
+ REQUIREMENTS:
617
+ 1. You MUST explicitly DISAGREE with at least one major point from the other speakers
618
+ 2. Identify the weakest assumption in the emerging consensus and attack it
619
+ 3. Consider: What would make this advice WRONG? What's the contrarian take?
620
+ 4. If everyone is converging too fast, that's a red flag — find the hidden complexity
621
+
622
+ Don't just "add nuance" or "build on" — find something to genuinely challenge.
623
+ If you can't find real disagreement, say why the consensus might be groupthink."""
624
+
625
+ first_speaker_with_blind = """You are {name}, speaking first in Round {round_num} of a council deliberation.
626
+
627
+ You've seen everyone's BLIND CLAIMS (their independent initial positions). Now engage:
628
+ 1. Reference at least ONE other speaker's blind claim
629
+ 2. Agree, disagree, or build on their position
630
+ 3. Develop your own position further based on what you've learned
631
+
632
+ Be direct. Challenge weak arguments. Don't be sycophantic."""
633
+
634
+ first_speaker_system = """You are {name}, speaking first in Round {round_num} of a council deliberation.
635
+
636
+ As the first speaker, stake a clear position on the question. Be specific and substantive so others can engage with your points.
637
+
638
+ End with 2-3 key claims that others should respond to."""
639
+
640
+ council_system = """You are {name}, participating in Round {round_num} of a council deliberation.
641
+
642
+ REQUIREMENTS for your response:
643
+ 1. Reference at least ONE previous speaker by name (e.g., "I agree with Speaker 1 that..." or "Speaker 2's point about X overlooks...")
644
+ 2. State explicitly: AGREE, DISAGREE, or BUILD ON their specific point
645
+ 3. Add ONE new consideration not yet raised
646
+ 4. Keep response under 250 words — be concise and practical
647
+
648
+ If you fully agree with emerging consensus, say: "CONSENSUS: [the agreed position]"
649
+
650
+ Previous speakers this round: {previous_speakers}
651
+
652
+ Be direct. Challenge weak arguments. Don't be sycophantic.
653
+ Prioritize PRACTICAL, ACTIONABLE advice over academic observations. Avoid jargon."""
654
+
655
+ for round_num in range(rounds):
656
+ round_speakers = []
657
+ for idx, (name, model, fallback) in enumerate(council_config):
658
+ dname = display_names[name]
659
+
660
+ if idx == 0 and round_num == 0:
661
+ if blind_claims:
662
+ system_prompt = first_speaker_with_blind.format(name=dname, round_num=round_num + 1)
663
+ else:
664
+ system_prompt = first_speaker_system.format(name=dname, round_num=round_num + 1)
665
+ else:
666
+ if round_speakers:
667
+ previous = ", ".join(round_speakers)
668
+ else:
669
+ previous = ", ".join([display_names[n] for n, _, _ in council_config])
670
+ system_prompt = council_system.format(
671
+ name=dname,
672
+ round_num=round_num + 1,
673
+ previous_speakers=previous
674
+ )
675
+
676
+ if social_mode:
677
+ system_prompt += social_constraint
678
+
679
+ if persona:
680
+ system_prompt += f"""
681
+
682
+ IMPORTANT CONTEXT about the person asking:
683
+ {persona}
684
+
685
+ Factor this into your advice — don't just give strategically optimal answers, consider what fits THIS person."""
686
+
687
+ if idx == advocate_idx and round_num == 0:
688
+ system_prompt += devils_advocate_addition
689
+
690
+ user_content = f"Question for the council:\n\n{question}"
691
+ if blind_context:
692
+ user_content += f"\n\n---\n\nBLIND CLAIMS (independent initial positions):\n\n{blind_context}"
693
+
694
+ messages = [
695
+ {"role": "system", "content": system_prompt},
696
+ {"role": "user", "content": user_content},
697
+ ]
698
+
699
+ for speaker, text in conversation:
700
+ speaker_dname = display_names[speaker]
701
+ sanitized_text = sanitize_speaker_content(text)
702
+ messages.append({
703
+ "role": "assistant" if speaker == name else "user",
704
+ "content": f"[{speaker_dname}]: {sanitized_text}" if speaker != name else sanitized_text,
705
+ })
706
+
707
+ model_name = model.split("/")[-1]
708
+
709
+ if verbose:
710
+ print(f"### {model_name}")
711
+ if is_thinking_model(model):
712
+ print("(thinking...)", flush=True)
713
+
714
+ response = query_model(api_key, model, messages, stream=verbose)
715
+
716
+ used_fallback = False
717
+ if response.startswith("[") and fallback:
718
+ fallback_provider, fallback_model = fallback
719
+
720
+ if fallback_provider == "google" and google_api_key:
721
+ if verbose:
722
+ print(f"(OpenRouter failed, trying AI Studio fallback: {fallback_model}...)", flush=True)
723
+ response = query_google_ai_studio(google_api_key, fallback_model, messages)
724
+ used_fallback = True
725
+ model_name = fallback_model
726
+
727
+ elif fallback_provider == "moonshot" and moonshot_api_key:
728
+ if verbose:
729
+ print(f"(OpenRouter failed, trying Moonshot fallback: {fallback_model}...)", flush=True)
730
+ response = query_moonshot(moonshot_api_key, fallback_model, messages)
731
+ used_fallback = True
732
+ model_name = fallback_model
733
+
734
+ if verbose and (is_thinking_model(model) or used_fallback):
735
+ print(response)
736
+
737
+ if response.startswith("["):
738
+ failed_models.append(f"{model_name}: {response}")
739
+
740
+ conversation.append((name, response))
741
+ round_speakers.append(dname)
742
+
743
+ if verbose:
744
+ print()
745
+
746
+ output_parts.append(f"### {model_name}\n{response}")
747
+
748
+ converged, reason = detect_consensus(conversation, len(council_config))
749
+ if converged:
750
+ if verbose:
751
+ print(f">>> CONSENSUS DETECTED ({reason}) - proceeding to judge\n")
752
+ break
753
+
754
+ # Judge synthesis
755
+ context_hint = ""
756
+ if context:
757
+ context_hint = f"\n\nContext about this question: {context}\nConsider this context when weighing perspectives and forming recommendations."
758
+
759
+ social_judge_section = ""
760
+ if social_mode:
761
+ social_judge_section = """
762
+
763
+ ## Social Calibration Check
764
+ [Would the recommendation feel natural in conversation? Is it something you'd actually say, or does it sound like strategic over-optimization? If the council produced something too formal/structured, suggest a simpler, more human alternative.]"""
765
+
766
+ judge_system = f"""You are the Judge, responsible for synthesizing the council's deliberation.{context_hint}
767
+
768
+ After the council members have shared their perspectives, you:
769
+ 1. Identify points of AGREEMENT across all members
770
+ 2. Identify points of DISAGREEMENT and explain the different views
771
+ 3. Provide a SYNTHESIS that captures the council's collective wisdom
772
+ 4. Give a final RECOMMENDATION based on the deliberation
773
+ {"5. SOCIAL CALIBRATION: Check if the recommendation would feel natural in actual conversation" if social_mode else ""}
774
+
775
+ Format your response as:
776
+
777
+ ## Points of Agreement
778
+ [What the council agrees on]
779
+
780
+ ## Points of Disagreement
781
+ [Where views differ and why]
782
+
783
+ ## Synthesis
784
+ [The integrated perspective]
785
+
786
+ ## Recommendation
787
+ [Your final recommendation based on the deliberation]
788
+ {social_judge_section}
789
+ Be balanced and fair. Acknowledge minority views. Don't just pick a winner.{" For social contexts, prioritize natural/human output over strategic optimization." if social_mode else ""}
790
+
791
+ IMPORTANT: In your Recommendation, clearly distinguish:
792
+ - **Do Now** — practical actions the user can take immediately
793
+ - **Consider Later** — interesting ideas that require more infrastructure or scale
794
+
795
+ Don't recommend building infrastructure for problems that don't exist yet."""
796
+
797
+ deliberation_text = "\n\n".join(
798
+ f"**{display_names[speaker]}**: {sanitize_speaker_content(text)}" for speaker, text in conversation
799
+ )
800
+
801
+ judge_messages = [
802
+ {"role": "system", "content": judge_system},
803
+ {"role": "user", "content": f"Question:\n{question}\n\n---\n\nCouncil Deliberation:\n\n{deliberation_text}"},
804
+ ]
805
+
806
+ judge_model_name = JUDGE_MODEL.split("/")[-1]
807
+
808
+ if verbose:
809
+ print(f"### Judge ({judge_model_name})")
810
+
811
+ judge_response = query_model(api_key, JUDGE_MODEL, judge_messages, max_tokens=1200, stream=verbose)
812
+
813
+ if verbose:
814
+ print()
815
+
816
+ output_parts.append(f"### Judge ({judge_model_name})\n{judge_response}")
817
+
818
+ if anonymous:
819
+ final_output = "\n\n".join(output_parts)
820
+ for name, model, _ in council_config:
821
+ anon_name = display_names[name]
822
+ model_name = model.split("/")[-1]
823
+ final_output = final_output.replace(f"### {anon_name}", f"### {model_name}")
824
+ final_output = final_output.replace(f"[{anon_name}]", f"[{model_name}]")
825
+ final_output = final_output.replace(f"**{anon_name}**", f"**{model_name}**")
826
+ final_output = final_output.replace(f"with {anon_name}", f"with {model_name}")
827
+ final_output = final_output.replace(f"{anon_name}'s", f"{model_name}'s")
828
+ return final_output, failed_models
829
+
830
+ return "\n\n".join(output_parts), failed_models
@@ -0,0 +1,150 @@
1
+ Metadata-Version: 2.4
2
+ Name: frontier-council
3
+ Version: 0.1.0
4
+ Summary: Multi-model deliberation for important decisions. 5 frontier LLMs debate, then a judge synthesizes consensus.
5
+ Project-URL: Homepage, https://github.com/terry-li-hm/skills
6
+ Author-email: Terry Li <terry.li.hm@gmail.com>
7
+ License-Expression: MIT
8
+ Keywords: ai,council,debate,deliberation,frontier,llm,multi-model,openrouter
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Environment :: Console
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
+ Requires-Python: >=3.11
18
+ Requires-Dist: httpx>=0.25.0
19
+ Description-Content-Type: text/markdown
20
+
21
+ # Frontier Council
22
+
23
+ Multi-model deliberation for important decisions. 5 frontier LLMs debate a question, then a judge synthesizes consensus.
24
+
25
+ Inspired by [Andrej Karpathy's LLM Council](https://github.com/karpathy/llm-council), with added blind phase (anti-anchoring), explicit engagement requirements, devil's advocate role, and social calibration mode.
26
+
27
+ ## Models
28
+
29
+ - Claude (claude-opus-4.5)
30
+ - GPT (gpt-5.2-pro)
31
+ - Gemini (gemini-3-pro-preview)
32
+ - Grok (grok-4)
33
+ - Kimi (kimi-k2.5)
34
+ - Judge: Claude Opus 4.5
35
+
36
+ ## Installation
37
+
38
+ ```bash
39
+ pip install frontier-council
40
+ ```
41
+
42
+ Or with uv:
43
+ ```bash
44
+ uv tool install frontier-council
45
+ ```
46
+
47
+ ## Setup
48
+
49
+ Set your OpenRouter API key:
50
+ ```bash
51
+ export OPENROUTER_API_KEY=sk-or-v1-...
52
+ ```
53
+
54
+ Optional fallback keys (for flaky models):
55
+ ```bash
56
+ export GOOGLE_API_KEY=AIza... # Gemini fallback
57
+ export MOONSHOT_API_KEY=sk-... # Kimi fallback
58
+ ```
59
+
60
+ ## Usage
61
+
62
+ ```bash
63
+ # Basic question
64
+ frontier-council "Should we use microservices or monolith?"
65
+
66
+ # With social calibration (for interview/networking questions)
67
+ frontier-council "What questions should I ask in the interview?" --social
68
+
69
+ # With persona context
70
+ frontier-council "Should I take the job?" --persona "builder who hates process work"
71
+
72
+ # Multiple rounds
73
+ frontier-council "Architecture decision" --rounds 3
74
+
75
+ # Save transcript
76
+ frontier-council "Career question" --output transcript.md
77
+
78
+ # Share via GitHub Gist
79
+ frontier-council "Important decision" --share
80
+ ```
81
+
82
+ ## Options
83
+
84
+ | Flag | Description |
85
+ |------|-------------|
86
+ | `--rounds N` | Number of deliberation rounds (default: 2, exits early on consensus) |
87
+ | `--output FILE` | Save transcript to file |
88
+ | `--named` | Let models see real names during deliberation (may increase bias) |
89
+ | `--no-blind` | Skip blind first-pass (faster, but first speaker anchors others) |
90
+ | `--context TEXT` | Context hint for judge (e.g., "architecture decision") |
91
+ | `--share` | Upload transcript to secret GitHub Gist |
92
+ | `--social` | Enable social calibration mode (auto-detected for interview/networking) |
93
+ | `--persona TEXT` | Context about the person asking |
94
+ | `--advocate N` | Which speaker (1-5) should be devil's advocate (default: random) |
95
+ | `--quiet` | Suppress progress output |
96
+
97
+ ## How It Works
98
+
99
+ **Blind First-Pass (Anti-Anchoring):**
100
+ 1. All models generate short "claim sketches" independently and in parallel
101
+ 2. This prevents the "first speaker lottery" where whoever speaks first anchors the debate
102
+ 3. Each model commits to an initial position before seeing any other responses
103
+
104
+ **Deliberation Protocol:**
105
+ 1. All models see everyone's blind claims, then deliberate
106
+ 2. Each model MUST explicitly AGREE, DISAGREE, or BUILD ON previous speakers by name
107
+ 3. After each round, the system checks for consensus (4/5 agreement triggers early exit)
108
+ 4. Judge synthesizes the full deliberation
109
+
110
+ **Anonymous Deliberation:**
111
+ - Models see each other as "Speaker 1", "Speaker 2", etc. during deliberation
112
+ - Prevents models from playing favorites based on vendor reputation
113
+ - Output transcript shows real model names for readability
114
+
115
+ ## When to Use
116
+
117
+ Use the council when:
118
+ - Making an important decision that benefits from diverse perspectives
119
+ - You want models to actually debate, not just answer in parallel
120
+ - You need a synthesized recommendation, not raw comparison
121
+ - Exploring trade-offs where different viewpoints matter
122
+
123
+ Skip the council when:
124
+ - You're just thinking out loud (exploratory discussions)
125
+ - The answer depends on personal preference more than objective trade-offs
126
+ - Speed matters (council takes 60-90 seconds)
127
+
128
+ ## Python API
129
+
130
+ ```python
131
+ from frontier_council import run_council, COUNCIL
132
+ import os
133
+
134
+ api_key = os.environ["OPENROUTER_API_KEY"]
135
+
136
+ transcript, failed_models = run_council(
137
+ question="Should we use microservices or monolith?",
138
+ council_config=COUNCIL,
139
+ api_key=api_key,
140
+ rounds=2,
141
+ verbose=True,
142
+ social_mode=False,
143
+ )
144
+
145
+ print(transcript)
146
+ ```
147
+
148
+ ## License
149
+
150
+ MIT
@@ -0,0 +1,7 @@
1
+ frontier_council/__init__.py,sha256=VfpeWHijQ1z8zd_ecKPIjI6S9VT3E6yAQ1PVt8-eExU,357
2
+ frontier_council/cli.py,sha256=BbM1cHWjAA0DBn4uyGKajaNAMxhKYQt-ZGNQ6hTnlkc,7337
3
+ frontier_council/council.py,sha256=u2ir34dNostBOhXUi1R0wFEfBIEgiRX8thiS5lRFnnU,30226
4
+ frontier_council-0.1.0.dist-info/METADATA,sha256=hZGQzWU0DUtuexLkKR4sXVHFjDbw75bYg02uQ8dFAB4,4792
5
+ frontier_council-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
6
+ frontier_council-0.1.0.dist-info/entry_points.txt,sha256=I3xjPK-nupfQz5PANVXUnXjuxlP-4-mykkA3wXhFOGY,63
7
+ frontier_council-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ frontier-council = frontier_council.cli:main