frontier-council 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- frontier_council/__init__.py +1 -1
- frontier_council/cli.py +97 -6
- frontier_council/council.py +271 -36
- frontier_council/schema.py +27 -0
- {frontier_council-0.1.2.dist-info → frontier_council-0.2.0.dist-info}/METADATA +19 -8
- frontier_council-0.2.0.dist-info/RECORD +9 -0
- frontier_council-0.1.2.dist-info/RECORD +0 -8
- {frontier_council-0.1.2.dist-info → frontier_council-0.2.0.dist-info}/WHEEL +0 -0
- {frontier_council-0.1.2.dist-info → frontier_council-0.2.0.dist-info}/entry_points.txt +0 -0
- {frontier_council-0.1.2.dist-info → frontier_council-0.2.0.dist-info}/licenses/LICENSE +0 -0
frontier_council/__init__.py
CHANGED
frontier_council/cli.py
CHANGED
|
@@ -29,6 +29,8 @@ from .council import (
|
|
|
29
29
|
COUNCIL,
|
|
30
30
|
detect_social_context,
|
|
31
31
|
run_council,
|
|
32
|
+
DOMAIN_CONTEXTS,
|
|
33
|
+
run_followup_discussion,
|
|
32
34
|
)
|
|
33
35
|
|
|
34
36
|
|
|
@@ -42,9 +44,10 @@ Examples:
|
|
|
42
44
|
frontier-council "What questions should I ask?" --social
|
|
43
45
|
frontier-council "Career decision" --persona "builder who hates process work"
|
|
44
46
|
frontier-council "Architecture choice" --rounds 3 --output transcript.md
|
|
47
|
+
frontier-council "Decision" --domain banking --followup --output counsel.md
|
|
45
48
|
""",
|
|
46
49
|
)
|
|
47
|
-
parser.add_argument("question", help="The question for the council to deliberate")
|
|
50
|
+
parser.add_argument("question", nargs="?", help="The question for the council to deliberate")
|
|
48
51
|
parser.add_argument(
|
|
49
52
|
"--rounds",
|
|
50
53
|
type=int,
|
|
@@ -74,6 +77,12 @@ Examples:
|
|
|
74
77
|
"--context", "-c",
|
|
75
78
|
help="Context hint for the judge (e.g., 'architecture decision', 'ethics question')",
|
|
76
79
|
)
|
|
80
|
+
parser.add_argument(
|
|
81
|
+
"--format", "-f",
|
|
82
|
+
choices=["json", "yaml", "prose"],
|
|
83
|
+
default="prose",
|
|
84
|
+
help="Output format: json (machine-parseable), yaml (structured), prose (default)",
|
|
85
|
+
)
|
|
77
86
|
parser.add_argument(
|
|
78
87
|
"--share",
|
|
79
88
|
action="store_true",
|
|
@@ -92,7 +101,20 @@ Examples:
|
|
|
92
101
|
"--advocate",
|
|
93
102
|
type=int,
|
|
94
103
|
choices=[1, 2, 3, 4, 5],
|
|
95
|
-
help="
|
|
104
|
+
help="DEPRECATED: Use --challenger instead. Maps to --challenger by model name.",
|
|
105
|
+
)
|
|
106
|
+
parser.add_argument(
|
|
107
|
+
"--domain",
|
|
108
|
+
help="Regulatory domain context (banking, healthcare, eu, fintech, bio)",
|
|
109
|
+
)
|
|
110
|
+
parser.add_argument(
|
|
111
|
+
"--challenger",
|
|
112
|
+
help="Which model should argue contrarian (claude, gpt, gemini, grok, kimi). Default: claude",
|
|
113
|
+
)
|
|
114
|
+
parser.add_argument(
|
|
115
|
+
"--followup",
|
|
116
|
+
action="store_true",
|
|
117
|
+
help="Enable followup mode to drill into specific points after judge synthesis",
|
|
96
118
|
)
|
|
97
119
|
parser.add_argument(
|
|
98
120
|
"--no-save",
|
|
@@ -121,12 +143,44 @@ Examples:
|
|
|
121
143
|
print(f"\n ... and {len(sessions) - 20} more")
|
|
122
144
|
sys.exit(0)
|
|
123
145
|
|
|
146
|
+
# Require question for normal operation
|
|
147
|
+
if not args.question:
|
|
148
|
+
parser.error("the following arguments are required: question")
|
|
149
|
+
|
|
124
150
|
# Auto-detect social context if not explicitly set
|
|
125
151
|
social_mode = args.social or detect_social_context(args.question)
|
|
126
152
|
if social_mode and not args.social and not args.quiet:
|
|
127
153
|
print("(Auto-detected social context - enabling social calibration mode)")
|
|
128
154
|
print()
|
|
129
155
|
|
|
156
|
+
# Validate and resolve domain
|
|
157
|
+
domain_context = None
|
|
158
|
+
if args.domain:
|
|
159
|
+
if args.domain.lower() not in DOMAIN_CONTEXTS:
|
|
160
|
+
print(f"Error: Unknown domain '{args.domain}'. Valid domains: {', '.join(DOMAIN_CONTEXTS.keys())}", file=sys.stderr)
|
|
161
|
+
sys.exit(1)
|
|
162
|
+
domain_context = args.domain.lower()
|
|
163
|
+
|
|
164
|
+
# Resolve challenger model
|
|
165
|
+
challenger_idx = None
|
|
166
|
+
if args.challenger:
|
|
167
|
+
challenger_lower = args.challenger.lower()
|
|
168
|
+
model_name_map = {n.lower(): i for i, (n, _, _) in enumerate(COUNCIL)}
|
|
169
|
+
if challenger_lower not in model_name_map:
|
|
170
|
+
print(f"Error: Unknown model '{args.challenger}'. Valid models: {', '.join(n for n, _, _ in COUNCIL)}", file=sys.stderr)
|
|
171
|
+
sys.exit(1)
|
|
172
|
+
challenger_idx = model_name_map[challenger_lower]
|
|
173
|
+
elif args.domain:
|
|
174
|
+
# Default challenger: GPT (index 0) when domain is set
|
|
175
|
+
# Reasoning: Grok is naturally contrarian anyway, so assigning GPT as challenger
|
|
176
|
+
# gives you two sources of pushback
|
|
177
|
+
challenger_idx = 0
|
|
178
|
+
|
|
179
|
+
if not args.quiet and challenger_idx is not None:
|
|
180
|
+
challenger_name = COUNCIL[challenger_idx][0]
|
|
181
|
+
print(f"(Contrainian challenger: {challenger_name})")
|
|
182
|
+
print()
|
|
183
|
+
|
|
130
184
|
# Get API keys
|
|
131
185
|
api_key = os.environ.get("OPENROUTER_API_KEY")
|
|
132
186
|
if not api_key:
|
|
@@ -155,14 +209,28 @@ Examples:
|
|
|
155
209
|
print()
|
|
156
210
|
|
|
157
211
|
try:
|
|
158
|
-
|
|
212
|
+
# Handle deprecated --advocate flag
|
|
213
|
+
if args.advocate:
|
|
214
|
+
print("Warning: --advocate is deprecated. Use --challenger instead.", file=sys.stderr)
|
|
215
|
+
model_names = [n for n, _, _ in COUNCIL]
|
|
216
|
+
mapped_model = model_names[args.advocate - 1].lower()
|
|
217
|
+
print(f" Mapping --advocate {args.advocate} to --challenger {mapped_model}", file=sys.stderr)
|
|
218
|
+
if not args.challenger:
|
|
219
|
+
args.challenger = mapped_model
|
|
220
|
+
# Re-resolve challenger_idx after mapping
|
|
221
|
+
challenger_lower = args.challenger.lower()
|
|
222
|
+
model_name_map = {n.lower(): i for i, (n, _, _) in enumerate(COUNCIL)}
|
|
223
|
+
challenger_idx = model_name_map.get(challenger_lower, 0)
|
|
159
224
|
|
|
160
225
|
if not args.quiet and args.persona:
|
|
161
226
|
print(f"(Persona context: {args.persona})")
|
|
162
227
|
print()
|
|
228
|
+
|
|
229
|
+
# Show starting challenger (now rotates each round)
|
|
163
230
|
if not args.quiet:
|
|
164
|
-
|
|
165
|
-
|
|
231
|
+
starting_challenger_idx = challenger_idx if challenger_idx is not None else 0
|
|
232
|
+
starting_challenger_name = COUNCIL[starting_challenger_idx][0]
|
|
233
|
+
print(f"(Starting challenger: {starting_challenger_name}, rotates each round)")
|
|
166
234
|
print()
|
|
167
235
|
|
|
168
236
|
transcript, failed_models = run_council(
|
|
@@ -178,9 +246,32 @@ Examples:
|
|
|
178
246
|
context=args.context,
|
|
179
247
|
social_mode=social_mode,
|
|
180
248
|
persona=args.persona,
|
|
181
|
-
|
|
249
|
+
domain=domain_context,
|
|
250
|
+
challenger_idx=challenger_idx,
|
|
251
|
+
format=args.format,
|
|
182
252
|
)
|
|
183
253
|
|
|
254
|
+
# Followup mode
|
|
255
|
+
followup_transcript = ""
|
|
256
|
+
if args.followup and not args.quiet:
|
|
257
|
+
print("\n" + "=" * 60)
|
|
258
|
+
print("Enter topic to explore further (or 'done'): ", end="", flush=True)
|
|
259
|
+
topic = input().strip()
|
|
260
|
+
|
|
261
|
+
if topic and topic.lower() != "done":
|
|
262
|
+
domain_ctxt = DOMAIN_CONTEXTS.get(domain_context, "") if domain_context else ""
|
|
263
|
+
followup_transcript = run_followup_discussion(
|
|
264
|
+
question=args.question,
|
|
265
|
+
topic=topic,
|
|
266
|
+
council_config=COUNCIL,
|
|
267
|
+
api_key=api_key,
|
|
268
|
+
domain_context=domain_ctxt,
|
|
269
|
+
social_mode=social_mode,
|
|
270
|
+
persona=args.persona,
|
|
271
|
+
verbose=not args.quiet,
|
|
272
|
+
)
|
|
273
|
+
transcript += "\n\n" + followup_transcript
|
|
274
|
+
|
|
184
275
|
# Print failure summary
|
|
185
276
|
if failed_models and not args.quiet:
|
|
186
277
|
print()
|
frontier_council/council.py
CHANGED
|
@@ -4,6 +4,9 @@ import asyncio
|
|
|
4
4
|
import httpx
|
|
5
5
|
import json
|
|
6
6
|
import re
|
|
7
|
+
import time
|
|
8
|
+
import yaml
|
|
9
|
+
from datetime import datetime
|
|
7
10
|
from pathlib import Path
|
|
8
11
|
|
|
9
12
|
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
|
|
@@ -14,15 +17,24 @@ MOONSHOT_URL = "https://api.moonshot.cn/v1/chat/completions"
|
|
|
14
17
|
# Format: (name, openrouter_model, fallback) - fallback is (provider, model) or None
|
|
15
18
|
# Providers: "google" = AI Studio, "moonshot" = Moonshot API
|
|
16
19
|
COUNCIL = [
|
|
17
|
-
("Claude", "anthropic/claude-opus-4.5", None),
|
|
18
20
|
("GPT", "openai/gpt-5.2-pro", None),
|
|
19
21
|
("Gemini", "google/gemini-3-pro-preview", ("google", "gemini-2.5-pro")),
|
|
20
22
|
("Grok", "x-ai/grok-4", None),
|
|
21
23
|
("Kimi", "moonshotai/kimi-k2.5", ("moonshot", "kimi-k2.5")),
|
|
22
24
|
]
|
|
23
25
|
|
|
26
|
+
# Claude is judge-only (not in council) to avoid conflict of interest
|
|
24
27
|
JUDGE_MODEL = "anthropic/claude-opus-4.5"
|
|
25
28
|
|
|
29
|
+
# Domain-specific regulatory contexts
|
|
30
|
+
DOMAIN_CONTEXTS = {
|
|
31
|
+
"banking": "You are operating in a banking/financial services regulatory environment. Consider: HKMA/MAS/FCA requirements, Model Risk Management (MRM) expectations, audit trail needs, BCBS 239 governance, explainability requirements, documentation standards, and regulatory scrutiny levels.",
|
|
32
|
+
"healthcare": "You are operating in a healthcare regulatory environment. Consider: HIPAA constraints on PHI handling, FDA requirements for medical devices, clinical validation expectations, interoperability standards (FHIR), GxP compliance, and patient safety requirements.",
|
|
33
|
+
"eu": "You are operating in the EU regulatory environment. Consider: GDPR data protection requirements, EU AI Act risk categorization, Digital Markets Act compliance, cross-border data transfer rules (Schrems II), and EU data localization expectations.",
|
|
34
|
+
"fintech": "You are operating in a fintech regulatory environment. Consider: KYC/AML requirements, PSD2 banking regulations, e-money licensing expectations, payment services directive compliance, and financial consumer protection rules.",
|
|
35
|
+
"bio": "You are operating in a biotech/pharma regulatory environment. Consider: FDA/EMA drug approval processes, GMP manufacturing requirements, clinical trial design expectations, pharmacovigilance obligations, and post-market surveillance requirements.",
|
|
36
|
+
}
|
|
37
|
+
|
|
26
38
|
# Keywords that suggest social/conversational context (auto-detect)
|
|
27
39
|
SOCIAL_KEYWORDS = [
|
|
28
40
|
"interview", "ask him", "ask her", "ask them", "question to ask",
|
|
@@ -31,9 +43,12 @@ SOCIAL_KEYWORDS = [
|
|
|
31
43
|
"what should i say", "how should i respond", "conversation",
|
|
32
44
|
]
|
|
33
45
|
|
|
34
|
-
# Thinking models
|
|
46
|
+
# Thinking models - use non-streaming, higher tokens, longer timeout
|
|
35
47
|
THINKING_MODEL_SUFFIXES = {
|
|
48
|
+
"claude-opus-4.5",
|
|
49
|
+
"gpt-5.2-pro",
|
|
36
50
|
"gemini-3-pro-preview",
|
|
51
|
+
"grok-4",
|
|
37
52
|
"kimi-k2.5",
|
|
38
53
|
"deepseek-r1",
|
|
39
54
|
"o1-preview", "o1-mini", "o1",
|
|
@@ -440,6 +455,7 @@ async def run_blind_phase_parallel(
|
|
|
440
455
|
moonshot_api_key: str | None = None,
|
|
441
456
|
verbose: bool = True,
|
|
442
457
|
persona: str | None = None,
|
|
458
|
+
domain_context: str = "",
|
|
443
459
|
) -> list[tuple[str, str, str]]:
|
|
444
460
|
"""Parallel blind first-pass: all models stake claims simultaneously."""
|
|
445
461
|
blind_system = """You are participating in the BLIND PHASE of a council deliberation.
|
|
@@ -454,6 +470,13 @@ Provide a CLAIM SKETCH (not a full response):
|
|
|
454
470
|
|
|
455
471
|
Keep it concise (~100 words). The full deliberation comes later."""
|
|
456
472
|
|
|
473
|
+
if domain_context:
|
|
474
|
+
blind_system += f"""
|
|
475
|
+
|
|
476
|
+
DOMAIN CONTEXT: {domain_context}
|
|
477
|
+
|
|
478
|
+
Apply this regulatory domain context to your analysis."""
|
|
479
|
+
|
|
457
480
|
if persona:
|
|
458
481
|
blind_system += f"""
|
|
459
482
|
|
|
@@ -519,28 +542,188 @@ def sanitize_speaker_content(content: str) -> str:
|
|
|
519
542
|
return sanitized
|
|
520
543
|
|
|
521
544
|
|
|
522
|
-
def detect_consensus(
|
|
523
|
-
|
|
545
|
+
def detect_consensus(
|
|
546
|
+
conversation: list[tuple[str, str]],
|
|
547
|
+
council_config: list[tuple[str, str, tuple[str, str] | None]],
|
|
548
|
+
current_challenger_idx: int | None = None,
|
|
549
|
+
) -> tuple[bool, str]:
|
|
550
|
+
"""Detect if council has converged. Returns (converged, reason).
|
|
551
|
+
|
|
552
|
+
Excludes the current challenger from consensus count since they're
|
|
553
|
+
structurally incentivized to disagree.
|
|
554
|
+
"""
|
|
555
|
+
council_size = len(council_config)
|
|
556
|
+
|
|
524
557
|
if len(conversation) < council_size:
|
|
525
558
|
return False, "insufficient responses"
|
|
526
559
|
|
|
527
|
-
recent =
|
|
560
|
+
recent = conversation[-council_size:]
|
|
528
561
|
|
|
529
|
-
|
|
530
|
-
if
|
|
562
|
+
# Exclude challenger from consensus count
|
|
563
|
+
if current_challenger_idx is not None:
|
|
564
|
+
challenger_name = council_config[current_challenger_idx][0]
|
|
565
|
+
recent = [(name, text) for name, text in recent if name != challenger_name]
|
|
566
|
+
|
|
567
|
+
effective_size = len(recent)
|
|
568
|
+
if effective_size == 0:
|
|
569
|
+
return False, "no non-challenger responses"
|
|
570
|
+
|
|
571
|
+
threshold = effective_size - 1 # Need all-but-one non-challengers to agree
|
|
572
|
+
|
|
573
|
+
consensus_count = sum(1 for _, text in recent if "CONSENSUS:" in text.upper())
|
|
574
|
+
if consensus_count >= threshold:
|
|
531
575
|
return True, "explicit consensus signals"
|
|
532
576
|
|
|
533
577
|
agreement_phrases = ["i agree with", "i concur", "we all agree", "consensus emerging"]
|
|
534
578
|
agreement_count = sum(
|
|
535
|
-
1 for text in recent
|
|
579
|
+
1 for _, text in recent
|
|
536
580
|
if any(phrase in text.lower() for phrase in agreement_phrases)
|
|
537
581
|
)
|
|
538
|
-
if agreement_count >=
|
|
582
|
+
if agreement_count >= threshold:
|
|
539
583
|
return True, "agreement language detected"
|
|
540
584
|
|
|
541
585
|
return False, "no consensus"
|
|
542
586
|
|
|
543
587
|
|
|
588
|
+
def extract_structured_summary(
|
|
589
|
+
judge_response: str,
|
|
590
|
+
question: str,
|
|
591
|
+
models_used: list[str],
|
|
592
|
+
rounds: int,
|
|
593
|
+
duration: float,
|
|
594
|
+
cost: float,
|
|
595
|
+
) -> dict:
|
|
596
|
+
lines = judge_response.split('\n')
|
|
597
|
+
|
|
598
|
+
decision = ""
|
|
599
|
+
confidence = "medium"
|
|
600
|
+
reasoning = ""
|
|
601
|
+
dissents = []
|
|
602
|
+
action_items = []
|
|
603
|
+
|
|
604
|
+
for i, line in enumerate(lines):
|
|
605
|
+
line_lower = line.lower()
|
|
606
|
+
if 'recommend' in line_lower or 'decision:' in line_lower:
|
|
607
|
+
decision = line.strip()
|
|
608
|
+
elif 'dissent' in line_lower or 'disagree' in line_lower:
|
|
609
|
+
dissents.append({"model": "Unknown", "concern": line.strip()})
|
|
610
|
+
elif 'action' in line_lower or 'next step' in line_lower:
|
|
611
|
+
action_items.append({"action": line.strip(), "priority": "medium"})
|
|
612
|
+
|
|
613
|
+
if not decision:
|
|
614
|
+
for line in lines:
|
|
615
|
+
if len(line.strip()) > 20:
|
|
616
|
+
decision = line.strip()
|
|
617
|
+
break
|
|
618
|
+
|
|
619
|
+
return {
|
|
620
|
+
"schema_version": "1.0",
|
|
621
|
+
"question": question,
|
|
622
|
+
"decision": decision[:500] if decision else "See transcript for details",
|
|
623
|
+
"confidence": confidence,
|
|
624
|
+
"reasoning_summary": judge_response[:1000],
|
|
625
|
+
"dissents": dissents[:5],
|
|
626
|
+
"action_items": action_items[:5],
|
|
627
|
+
"meta": {
|
|
628
|
+
"timestamp": datetime.now().isoformat(),
|
|
629
|
+
"models_used": models_used,
|
|
630
|
+
"rounds": rounds,
|
|
631
|
+
"duration_seconds": duration,
|
|
632
|
+
"estimated_cost_usd": cost
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
def run_followup_discussion(
|
|
638
|
+
question: str,
|
|
639
|
+
topic: str,
|
|
640
|
+
council_config: list[tuple[str, str, tuple[str, str] | None]],
|
|
641
|
+
api_key: str,
|
|
642
|
+
domain_context: str = "",
|
|
643
|
+
social_mode: bool = False,
|
|
644
|
+
persona: str | None = None,
|
|
645
|
+
verbose: bool = True,
|
|
646
|
+
) -> str:
|
|
647
|
+
"""Run a focused followup discussion on a specific topic with 2 models. Returns the followup transcript."""
|
|
648
|
+
# Use judge (Claude 0) and one other model (GPT 1) for followup
|
|
649
|
+
followup_models = council_config[:2] # Claude and GPT
|
|
650
|
+
|
|
651
|
+
followup_transcript_parts = []
|
|
652
|
+
|
|
653
|
+
if verbose:
|
|
654
|
+
print()
|
|
655
|
+
print("=" * 60)
|
|
656
|
+
print(f"FOLLOWUP: {topic}")
|
|
657
|
+
print("=" * 60)
|
|
658
|
+
print()
|
|
659
|
+
|
|
660
|
+
social_constraint = """
|
|
661
|
+
|
|
662
|
+
SOCIAL CALIBRATION: This is a social/conversational context (interview, networking, outreach).
|
|
663
|
+
Your output should feel natural in conversation - something you'd actually say over coffee.
|
|
664
|
+
Avoid structured, multi-part diagnostic questions that sound like interrogation.
|
|
665
|
+
Simple and human beats strategic and comprehensive. Optimize for being relatable, not thorough.""" if social_mode else ""
|
|
666
|
+
|
|
667
|
+
followup_parts = [
|
|
668
|
+
"You are participating in a FOCUSED FOLLOWUP discussion on a specific topic.",
|
|
669
|
+
"",
|
|
670
|
+
f"The main council has concluded, and we're now drilling down into:",
|
|
671
|
+
f"TOPIC: {topic}",
|
|
672
|
+
"",
|
|
673
|
+
"Keep your response focused on this specific topic. Don't rehash the full council deliberation.",
|
|
674
|
+
"Be concise and practical.",
|
|
675
|
+
"",
|
|
676
|
+
]
|
|
677
|
+
|
|
678
|
+
if social_constraint:
|
|
679
|
+
followup_parts.append(social_constraint.strip())
|
|
680
|
+
|
|
681
|
+
if persona:
|
|
682
|
+
followup_parts.extend([
|
|
683
|
+
"",
|
|
684
|
+
"IMPORTANT CONTEXT about the person asking:",
|
|
685
|
+
persona,
|
|
686
|
+
"",
|
|
687
|
+
"Factor this into your advice — don't just give strategically optimal answers, consider what fits THIS person.",
|
|
688
|
+
])
|
|
689
|
+
|
|
690
|
+
if domain_context:
|
|
691
|
+
followup_parts.extend([
|
|
692
|
+
"",
|
|
693
|
+
f"DOMAIN CONTEXT: {domain_context}",
|
|
694
|
+
"",
|
|
695
|
+
"Apply this regulatory domain context to your analysis.",
|
|
696
|
+
])
|
|
697
|
+
|
|
698
|
+
followup_system = "\n".join(followup_parts)
|
|
699
|
+
|
|
700
|
+
followup_transcript_parts.append(f"### Followup Discussion: {topic}\n")
|
|
701
|
+
|
|
702
|
+
for i, (name, model, fallback) in enumerate(followup_models):
|
|
703
|
+
messages = [
|
|
704
|
+
{"role": "system", "content": followup_system},
|
|
705
|
+
{"role": "user", "content": f"Original Question:\n\n{question}\n\nFocus your response on: {topic}"},
|
|
706
|
+
]
|
|
707
|
+
|
|
708
|
+
if verbose:
|
|
709
|
+
print(f"### {name}")
|
|
710
|
+
|
|
711
|
+
response = query_model(api_key, model, messages, stream=verbose)
|
|
712
|
+
|
|
713
|
+
if verbose:
|
|
714
|
+
print()
|
|
715
|
+
|
|
716
|
+
followup_transcript_parts.append(f"### {name}\n{response}\n")
|
|
717
|
+
|
|
718
|
+
if verbose:
|
|
719
|
+
print("=" * 60)
|
|
720
|
+
print("FOLLOWUP COMPLETE")
|
|
721
|
+
print("=" * 60)
|
|
722
|
+
print()
|
|
723
|
+
|
|
724
|
+
return "\n\n".join(followup_transcript_parts)
|
|
725
|
+
|
|
726
|
+
|
|
544
727
|
def run_council(
|
|
545
728
|
question: str,
|
|
546
729
|
council_config: list[tuple[str, str, tuple[str, str] | None]],
|
|
@@ -554,17 +737,22 @@ def run_council(
|
|
|
554
737
|
context: str | None = None,
|
|
555
738
|
social_mode: bool = False,
|
|
556
739
|
persona: str | None = None,
|
|
557
|
-
|
|
740
|
+
domain: str | None = None,
|
|
741
|
+
challenger_idx: int | None = None, # Starting challenger index, rotates each round
|
|
742
|
+
format: str = "prose",
|
|
558
743
|
) -> tuple[str, list[str]]:
|
|
559
744
|
"""Run the council deliberation. Returns (transcript, failed_models)."""
|
|
560
745
|
|
|
746
|
+
start_time = time.time()
|
|
747
|
+
|
|
748
|
+
domain_context = DOMAIN_CONTEXTS.get(domain, "") if domain else ""
|
|
561
749
|
council_names = [name for name, _, _ in council_config]
|
|
562
750
|
blind_claims = []
|
|
563
751
|
failed_models = []
|
|
564
752
|
|
|
565
753
|
if blind:
|
|
566
754
|
blind_claims = asyncio.run(run_blind_phase_parallel(
|
|
567
|
-
question, council_config, api_key, google_api_key, moonshot_api_key, verbose, persona
|
|
755
|
+
question, council_config, api_key, google_api_key, moonshot_api_key, verbose, persona, domain_context
|
|
568
756
|
))
|
|
569
757
|
for name, model_name, claims in blind_claims:
|
|
570
758
|
if claims.startswith("["):
|
|
@@ -580,6 +768,8 @@ def run_council(
|
|
|
580
768
|
if anonymous:
|
|
581
769
|
print("(Models see each other as Speaker 1, 2, etc. to prevent bias)")
|
|
582
770
|
print(f"Rounds: {rounds}")
|
|
771
|
+
if domain:
|
|
772
|
+
print(f"Domain context: {domain}")
|
|
583
773
|
print(f"Question: {question[:100]}{'...' if len(question) > 100 else ''}")
|
|
584
774
|
print()
|
|
585
775
|
print("=" * 60)
|
|
@@ -589,6 +779,7 @@ def run_council(
|
|
|
589
779
|
|
|
590
780
|
conversation = []
|
|
591
781
|
output_parts = []
|
|
782
|
+
current_round = 0
|
|
592
783
|
|
|
593
784
|
if blind_claims:
|
|
594
785
|
for name, model_name, claims in blind_claims:
|
|
@@ -609,19 +800,6 @@ Your output should feel natural in conversation - something you'd actually say o
|
|
|
609
800
|
Avoid structured, multi-part diagnostic questions that sound like interrogation.
|
|
610
801
|
Simple and human beats strategic and comprehensive. Optimize for being relatable, not thorough."""
|
|
611
802
|
|
|
612
|
-
devils_advocate_addition = """
|
|
613
|
-
|
|
614
|
-
SPECIAL ROLE: You are the DEVIL'S ADVOCATE. Your job is to push back HARD.
|
|
615
|
-
|
|
616
|
-
REQUIREMENTS:
|
|
617
|
-
1. You MUST explicitly DISAGREE with at least one major point from the other speakers
|
|
618
|
-
2. Identify the weakest assumption in the emerging consensus and attack it
|
|
619
|
-
3. Consider: What would make this advice WRONG? What's the contrarian take?
|
|
620
|
-
4. If everyone is converging too fast, that's a red flag — find the hidden complexity
|
|
621
|
-
|
|
622
|
-
Don't just "add nuance" or "build on" — find something to genuinely challenge.
|
|
623
|
-
If you can't find real disagreement, say why the consensus might be groupthink."""
|
|
624
|
-
|
|
625
803
|
first_speaker_with_blind = """You are {name}, speaking first in Round {round_num} of a council deliberation.
|
|
626
804
|
|
|
627
805
|
You've seen everyone's BLIND CLAIMS (their independent initial positions). Now engage:
|
|
@@ -652,7 +830,22 @@ Previous speakers this round: {previous_speakers}
|
|
|
652
830
|
Be direct. Challenge weak arguments. Don't be sycophantic.
|
|
653
831
|
Prioritize PRACTICAL, ACTIONABLE advice over academic observations. Avoid jargon."""
|
|
654
832
|
|
|
833
|
+
challenger_addition = """
|
|
834
|
+
|
|
835
|
+
SPECIAL ROLE: You are the CHALLENGER for this round. Your job is to argue the CONTRARIAN position.
|
|
836
|
+
|
|
837
|
+
REQUIREMENTS:
|
|
838
|
+
1. You MUST explicitly DISAGREE with at least one major point from the other speakers
|
|
839
|
+
2. Identify the weakest assumption in the emerging consensus and attack it
|
|
840
|
+
3. Name ONE specific thing that would make the consensus WRONG
|
|
841
|
+
4. You CANNOT use phrases like "building on", "adding nuance", or "I largely agree"
|
|
842
|
+
5. If everyone is converging too fast, that's a red flag — find the hidden complexity
|
|
843
|
+
|
|
844
|
+
Even if you ultimately agree with the direction, you MUST articulate the strongest possible counter-argument.
|
|
845
|
+
If you can't find real disagreement, explain why the consensus might be groupthink."""
|
|
846
|
+
|
|
655
847
|
for round_num in range(rounds):
|
|
848
|
+
current_round = round_num + 1
|
|
656
849
|
round_speakers = []
|
|
657
850
|
for idx, (name, model, fallback) in enumerate(council_config):
|
|
658
851
|
dname = display_names[name]
|
|
@@ -673,6 +866,13 @@ Prioritize PRACTICAL, ACTIONABLE advice over academic observations. Avoid jargon
|
|
|
673
866
|
previous_speakers=previous
|
|
674
867
|
)
|
|
675
868
|
|
|
869
|
+
if domain_context:
|
|
870
|
+
system_prompt += f"""
|
|
871
|
+
|
|
872
|
+
DOMAIN CONTEXT: {domain_context}
|
|
873
|
+
|
|
874
|
+
Apply this regulatory domain context to your analysis."""
|
|
875
|
+
|
|
676
876
|
if social_mode:
|
|
677
877
|
system_prompt += social_constraint
|
|
678
878
|
|
|
@@ -684,8 +884,16 @@ IMPORTANT CONTEXT about the person asking:
|
|
|
684
884
|
|
|
685
885
|
Factor this into your advice — don't just give strategically optimal answers, consider what fits THIS person."""
|
|
686
886
|
|
|
687
|
-
|
|
688
|
-
|
|
887
|
+
# Calculate rotating challenger for this round
|
|
888
|
+
if challenger_idx is not None:
|
|
889
|
+
# Explicit --challenger sets starting point, then rotates
|
|
890
|
+
current_challenger = (challenger_idx + round_num) % len(council_config)
|
|
891
|
+
else:
|
|
892
|
+
# Default: start with Claude (index 0), rotate through council
|
|
893
|
+
current_challenger = round_num % len(council_config)
|
|
894
|
+
|
|
895
|
+
if idx == current_challenger:
|
|
896
|
+
system_prompt += challenger_addition
|
|
689
897
|
|
|
690
898
|
user_content = f"Question for the council:\n\n{question}"
|
|
691
899
|
if blind_context:
|
|
@@ -705,9 +913,10 @@ Factor this into your advice — don't just give strategically optimal answers,
|
|
|
705
913
|
})
|
|
706
914
|
|
|
707
915
|
model_name = model.split("/")[-1]
|
|
916
|
+
challenger_indicator = " (challenger)" if idx == current_challenger else ""
|
|
708
917
|
|
|
709
918
|
if verbose:
|
|
710
|
-
print(f"### {model_name}")
|
|
919
|
+
print(f"### {model_name}{challenger_indicator}")
|
|
711
920
|
if is_thinking_model(model):
|
|
712
921
|
print("(thinking...)", flush=True)
|
|
713
922
|
|
|
@@ -743,9 +952,10 @@ Factor this into your advice — don't just give strategically optimal answers,
|
|
|
743
952
|
if verbose:
|
|
744
953
|
print()
|
|
745
954
|
|
|
746
|
-
output_parts.append(f"### {model_name}\n{response}")
|
|
955
|
+
output_parts.append(f"### {model_name}{challenger_indicator}\n{response}")
|
|
747
956
|
|
|
748
|
-
|
|
957
|
+
# current_challenger already calculated in the speaker loop above
|
|
958
|
+
converged, reason = detect_consensus(conversation, council_config, current_challenger)
|
|
749
959
|
if converged:
|
|
750
960
|
if verbose:
|
|
751
961
|
print(f">>> CONSENSUS DETECTED ({reason}) - proceeding to judge\n")
|
|
@@ -755,6 +965,10 @@ Factor this into your advice — don't just give strategically optimal answers,
|
|
|
755
965
|
context_hint = ""
|
|
756
966
|
if context:
|
|
757
967
|
context_hint = f"\n\nContext about this question: {context}\nConsider this context when weighing perspectives and forming recommendations."
|
|
968
|
+
|
|
969
|
+
domain_hint = ""
|
|
970
|
+
if domain_context:
|
|
971
|
+
domain_hint = f"\n\nDOMAIN CONTEXT: {domain}\nConsider this regulatory domain context when weighing perspectives and forming recommendations."
|
|
758
972
|
|
|
759
973
|
social_judge_section = ""
|
|
760
974
|
if social_mode:
|
|
@@ -763,14 +977,17 @@ Factor this into your advice — don't just give strategically optimal answers,
|
|
|
763
977
|
## Social Calibration Check
|
|
764
978
|
[Would the recommendation feel natural in conversation? Is it something you'd actually say, or does it sound like strategic over-optimization? If the council produced something too formal/structured, suggest a simpler, more human alternative.]"""
|
|
765
979
|
|
|
766
|
-
judge_system = f"""You are the Judge, responsible for synthesizing the council's deliberation.{context_hint}
|
|
980
|
+
judge_system = f"""You are the Judge (Claude), responsible for synthesizing the council's deliberation.{context_hint}{domain_hint}
|
|
981
|
+
|
|
982
|
+
You did NOT participate in the deliberation — you're seeing it fresh. This gives you objectivity.
|
|
767
983
|
|
|
768
984
|
After the council members have shared their perspectives, you:
|
|
769
985
|
1. Identify points of AGREEMENT across all members
|
|
770
986
|
2. Identify points of DISAGREEMENT and explain the different views
|
|
771
|
-
3.
|
|
772
|
-
4.
|
|
773
|
-
|
|
987
|
+
3. Add YOUR OWN perspective — what did the council miss? What's your independent take?
|
|
988
|
+
4. Provide a SYNTHESIS that integrates the council's views with your own
|
|
989
|
+
5. Give a final RECOMMENDATION based on everything
|
|
990
|
+
{"6. SOCIAL CALIBRATION: Check if the recommendation would feel natural in actual conversation" if social_mode else ""}
|
|
774
991
|
|
|
775
992
|
Format your response as:
|
|
776
993
|
|
|
@@ -780,13 +997,16 @@ Format your response as:
|
|
|
780
997
|
## Points of Disagreement
|
|
781
998
|
[Where views differ and why]
|
|
782
999
|
|
|
1000
|
+
## Judge's Own Take
|
|
1001
|
+
[Your independent perspective. What did the council miss or underweight? What would YOU add to this discussion?]
|
|
1002
|
+
|
|
783
1003
|
## Synthesis
|
|
784
|
-
[The integrated perspective]
|
|
1004
|
+
[The integrated perspective, combining council views with your own]
|
|
785
1005
|
|
|
786
1006
|
## Recommendation
|
|
787
|
-
[Your final recommendation
|
|
1007
|
+
[Your final recommendation]
|
|
788
1008
|
{social_judge_section}
|
|
789
|
-
Be balanced and fair. Acknowledge minority views.
|
|
1009
|
+
Be balanced and fair. Acknowledge minority views. But don't be afraid to have your own opinion — you're the judge, not just a summarizer.{" For social contexts, prioritize natural/human output over strategic optimization." if social_mode else ""}
|
|
790
1010
|
|
|
791
1011
|
IMPORTANT: In your Recommendation, clearly distinguish:
|
|
792
1012
|
- **Do Now** — practical actions the user can take immediately
|
|
@@ -815,6 +1035,21 @@ Don't recommend building infrastructure for problems that don't exist yet."""
|
|
|
815
1035
|
|
|
816
1036
|
output_parts.append(f"### Judge ({judge_model_name})\n{judge_response}")
|
|
817
1037
|
|
|
1038
|
+
if format != 'prose':
|
|
1039
|
+
structured = extract_structured_summary(
|
|
1040
|
+
judge_response=judge_response,
|
|
1041
|
+
question=question,
|
|
1042
|
+
models_used=[name for name, _, _ in council_config],
|
|
1043
|
+
rounds=current_round if rounds > 0 else 1,
|
|
1044
|
+
duration=time.time() - start_time,
|
|
1045
|
+
cost=0.85,
|
|
1046
|
+
)
|
|
1047
|
+
|
|
1048
|
+
if format == 'json':
|
|
1049
|
+
output_parts.append('\n\n---\n\n' + json.dumps(structured, indent=2, ensure_ascii=False))
|
|
1050
|
+
else:
|
|
1051
|
+
output_parts.append('\n\n---\n\n' + yaml.dump(structured, allow_unicode=True, default_flow_style=False))
|
|
1052
|
+
|
|
818
1053
|
if anonymous:
|
|
819
1054
|
final_output = "\n\n".join(output_parts)
|
|
820
1055
|
for name, model, _ in council_config:
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from pydantic import BaseModel
|
|
2
|
+
from typing import Literal
|
|
3
|
+
|
|
4
|
+
class ActionItem(BaseModel):
|
|
5
|
+
action: str
|
|
6
|
+
priority: Literal["high", "medium", "low"] = "medium"
|
|
7
|
+
|
|
8
|
+
class Dissent(BaseModel):
|
|
9
|
+
model: str
|
|
10
|
+
concern: str
|
|
11
|
+
|
|
12
|
+
class CouncilMeta(BaseModel):
|
|
13
|
+
timestamp: str
|
|
14
|
+
models_used: list[str]
|
|
15
|
+
rounds: int
|
|
16
|
+
duration_seconds: float
|
|
17
|
+
estimated_cost_usd: float
|
|
18
|
+
|
|
19
|
+
class CouncilOutput(BaseModel):
|
|
20
|
+
schema_version: str = "1.0"
|
|
21
|
+
question: str
|
|
22
|
+
decision: str
|
|
23
|
+
confidence: Literal["low", "medium", "high"]
|
|
24
|
+
reasoning_summary: str
|
|
25
|
+
dissents: list[Dissent]
|
|
26
|
+
action_items: list[ActionItem]
|
|
27
|
+
meta: CouncilMeta
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: frontier-council
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: Multi-model deliberation for important decisions.
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Multi-model deliberation for important decisions. 4 frontier LLMs debate with rotating challenger, then Claude judges.
|
|
5
5
|
Project-URL: Homepage, https://github.com/terry-li-hm/frontier-council
|
|
6
6
|
Project-URL: Repository, https://github.com/terry-li-hm/frontier-council
|
|
7
7
|
Project-URL: Issues, https://github.com/terry-li-hm/frontier-council/issues
|
|
@@ -19,22 +19,25 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
19
19
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
20
|
Requires-Python: >=3.11
|
|
21
21
|
Requires-Dist: httpx>=0.25.0
|
|
22
|
+
Requires-Dist: pydantic>=2.0
|
|
23
|
+
Requires-Dist: pyyaml>=6.0
|
|
22
24
|
Description-Content-Type: text/markdown
|
|
23
25
|
|
|
24
26
|
# Frontier Council
|
|
25
27
|
|
|
26
|
-
Multi-model deliberation for important decisions.
|
|
28
|
+
Multi-model deliberation for important decisions. 4 frontier LLMs debate a question, then Claude judges and synthesizes.
|
|
27
29
|
|
|
28
|
-
Inspired by [Andrej Karpathy's LLM Council](https://github.com/karpathy/llm-council), with added blind phase (anti-anchoring), explicit engagement requirements,
|
|
30
|
+
Inspired by [Andrej Karpathy's LLM Council](https://github.com/karpathy/llm-council), with added blind phase (anti-anchoring), explicit engagement requirements, rotating challenger role, and social calibration mode.
|
|
29
31
|
|
|
30
32
|
## Models
|
|
31
33
|
|
|
32
|
-
|
|
34
|
+
**Council (deliberators):**
|
|
33
35
|
- GPT (gpt-5.2-pro)
|
|
34
36
|
- Gemini (gemini-3-pro-preview)
|
|
35
37
|
- Grok (grok-4)
|
|
36
38
|
- Kimi (kimi-k2.5)
|
|
37
|
-
|
|
39
|
+
|
|
40
|
+
**Judge:** Claude Opus 4.5 (synthesizes + adds own perspective)
|
|
38
41
|
|
|
39
42
|
## Installation
|
|
40
43
|
|
|
@@ -99,7 +102,9 @@ All sessions are auto-saved to `~/.frontier-council/sessions/` for later review.
|
|
|
99
102
|
| `--share` | Upload transcript to secret GitHub Gist |
|
|
100
103
|
| `--social` | Enable social calibration mode (auto-detected for interview/networking) |
|
|
101
104
|
| `--persona TEXT` | Context about the person asking |
|
|
102
|
-
| `--
|
|
105
|
+
| `--challenger MODEL` | Which model starts as challenger (gpt/gemini/grok/kimi). Rotates each round. |
|
|
106
|
+
| `--domain DOMAIN` | Regulatory domain context (banking, healthcare, eu, fintech, bio) |
|
|
107
|
+
| `--followup` | Enable interactive drill-down after judge synthesis |
|
|
103
108
|
| `--quiet` | Suppress progress output |
|
|
104
109
|
| `--sessions` | List recent saved sessions |
|
|
105
110
|
| `--no-save` | Don't auto-save transcript to ~/.frontier-council/sessions/ |
|
|
@@ -114,9 +119,15 @@ All sessions are auto-saved to `~/.frontier-council/sessions/` for later review.
|
|
|
114
119
|
**Deliberation Protocol:**
|
|
115
120
|
1. All models see everyone's blind claims, then deliberate
|
|
116
121
|
2. Each model MUST explicitly AGREE, DISAGREE, or BUILD ON previous speakers by name
|
|
117
|
-
3. After each round, the system checks for consensus (4
|
|
122
|
+
3. After each round, the system checks for consensus (3/4 non-challengers agreeing triggers early exit)
|
|
118
123
|
4. Judge synthesizes the full deliberation
|
|
119
124
|
|
|
125
|
+
**Rotating Challenger:**
|
|
126
|
+
- One model each round is assigned the "challenger" role
|
|
127
|
+
- The challenger MUST argue the contrarian position and identify weaknesses in emerging consensus
|
|
128
|
+
- Role rotates each round (GPT R1 → Gemini R2 → Grok R3 → Kimi R4...) to ensure sustained disagreement
|
|
129
|
+
- Challenger is excluded from consensus detection (forced disagreement shouldn't block early exit)
|
|
130
|
+
|
|
120
131
|
**Anonymous Deliberation:**
|
|
121
132
|
- Models see each other as "Speaker 1", "Speaker 2", etc. during deliberation
|
|
122
133
|
- Prevents models from playing favorites based on vendor reputation
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
frontier_council/__init__.py,sha256=lJJIdVKStG2zLYlKMtvIpxK3S2D0NsWK_ZIKc86y2VM,357
|
|
2
|
+
frontier_council/cli.py,sha256=eN3EdJGWEkQ4OEmWq2s1IQo3-Q8kPUrQMgcPMRkCDa8,13796
|
|
3
|
+
frontier_council/council.py,sha256=ntu4D0NQizglfxGxDvuXZ7hjRXWqDNgzaIVZci8cmzc,39617
|
|
4
|
+
frontier_council/schema.py,sha256=j4436pYP-PtBorQZve6dpA7JqYlkTIlkxMyufSmVra0,633
|
|
5
|
+
frontier_council-0.2.0.dist-info/METADATA,sha256=jAZRJ0fpGxkM_zsmTMFLe_WRRwWulr8PVIm5xwsFNy4,5921
|
|
6
|
+
frontier_council-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
7
|
+
frontier_council-0.2.0.dist-info/entry_points.txt,sha256=I3xjPK-nupfQz5PANVXUnXjuxlP-4-mykkA3wXhFOGY,63
|
|
8
|
+
frontier_council-0.2.0.dist-info/licenses/LICENSE,sha256=8qmwox7khp-AakNVvL-Ga25eYbsCtLx8RyXM4zKkX0w,1065
|
|
9
|
+
frontier_council-0.2.0.dist-info/RECORD,,
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
frontier_council/__init__.py,sha256=VAITtl8rVW6wSN4iN86TVOmB2D2nrGu8ZI9ezDnOL3I,357
|
|
2
|
-
frontier_council/cli.py,sha256=9ZJgdFXHSgFDACf30veWb3uHruxlueVQY-N8hHSJL9M,9772
|
|
3
|
-
frontier_council/council.py,sha256=u2ir34dNostBOhXUi1R0wFEfBIEgiRX8thiS5lRFnnU,30226
|
|
4
|
-
frontier_council-0.1.2.dist-info/METADATA,sha256=E9iEMkdVEBtmA2NyprKIWipDAnR0gHWVGbIonlmVYrc,5229
|
|
5
|
-
frontier_council-0.1.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
6
|
-
frontier_council-0.1.2.dist-info/entry_points.txt,sha256=I3xjPK-nupfQz5PANVXUnXjuxlP-4-mykkA3wXhFOGY,63
|
|
7
|
-
frontier_council-0.1.2.dist-info/licenses/LICENSE,sha256=8qmwox7khp-AakNVvL-Ga25eYbsCtLx8RyXM4zKkX0w,1065
|
|
8
|
-
frontier_council-0.1.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|