frontier-council 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  """Frontier Council - Multi-model deliberation for important decisions."""
2
2
 
3
- __version__ = "0.1.2"
3
+ __version__ = "0.1.3"
4
4
 
5
5
  from .council import (
6
6
  run_council,
frontier_council/cli.py CHANGED
@@ -29,6 +29,8 @@ from .council import (
29
29
  COUNCIL,
30
30
  detect_social_context,
31
31
  run_council,
32
+ DOMAIN_CONTEXTS,
33
+ run_followup_discussion,
32
34
  )
33
35
 
34
36
 
@@ -42,9 +44,10 @@ Examples:
42
44
  frontier-council "What questions should I ask?" --social
43
45
  frontier-council "Career decision" --persona "builder who hates process work"
44
46
  frontier-council "Architecture choice" --rounds 3 --output transcript.md
47
+ frontier-council "Decision" --domain banking --followup --output counsel.md
45
48
  """,
46
49
  )
47
- parser.add_argument("question", help="The question for the council to deliberate")
50
+ parser.add_argument("question", nargs="?", help="The question for the council to deliberate")
48
51
  parser.add_argument(
49
52
  "--rounds",
50
53
  type=int,
@@ -74,6 +77,12 @@ Examples:
74
77
  "--context", "-c",
75
78
  help="Context hint for the judge (e.g., 'architecture decision', 'ethics question')",
76
79
  )
80
+ parser.add_argument(
81
+ "--format", "-f",
82
+ choices=["json", "yaml", "prose"],
83
+ default="prose",
84
+ help="Output format: json (machine-parseable), yaml (structured), prose (default)",
85
+ )
77
86
  parser.add_argument(
78
87
  "--share",
79
88
  action="store_true",
@@ -92,7 +101,20 @@ Examples:
92
101
  "--advocate",
93
102
  type=int,
94
103
  choices=[1, 2, 3, 4, 5],
95
- help="Which speaker (1-5) should be devil's advocate (default: random)",
104
+ help="DEPRECATED: Use --challenger instead. Maps to --challenger by model name.",
105
+ )
106
+ parser.add_argument(
107
+ "--domain",
108
+ help="Regulatory domain context (banking, healthcare, eu, fintech, bio)",
109
+ )
110
+ parser.add_argument(
111
+ "--challenger",
112
+ help="Which model should argue contrarian (claude, gpt, gemini, grok, kimi). Default: claude",
113
+ )
114
+ parser.add_argument(
115
+ "--followup",
116
+ action="store_true",
117
+ help="Enable followup mode to drill into specific points after judge synthesis",
96
118
  )
97
119
  parser.add_argument(
98
120
  "--no-save",
@@ -121,12 +143,44 @@ Examples:
121
143
  print(f"\n ... and {len(sessions) - 20} more")
122
144
  sys.exit(0)
123
145
 
146
+ # Require question for normal operation
147
+ if not args.question:
148
+ parser.error("the following arguments are required: question")
149
+
124
150
  # Auto-detect social context if not explicitly set
125
151
  social_mode = args.social or detect_social_context(args.question)
126
152
  if social_mode and not args.social and not args.quiet:
127
153
  print("(Auto-detected social context - enabling social calibration mode)")
128
154
  print()
129
155
 
156
+ # Validate and resolve domain
157
+ domain_context = None
158
+ if args.domain:
159
+ if args.domain.lower() not in DOMAIN_CONTEXTS:
160
+ print(f"Error: Unknown domain '{args.domain}'. Valid domains: {', '.join(DOMAIN_CONTEXTS.keys())}", file=sys.stderr)
161
+ sys.exit(1)
162
+ domain_context = args.domain.lower()
163
+
164
+ # Resolve challenger model
165
+ challenger_idx = None
166
+ if args.challenger:
167
+ challenger_lower = args.challenger.lower()
168
+ model_name_map = {n.lower(): i for i, (n, _, _) in enumerate(COUNCIL)}
169
+ if challenger_lower not in model_name_map:
170
+ print(f"Error: Unknown model '{args.challenger}'. Valid models: {', '.join(n for n, _, _ in COUNCIL)}", file=sys.stderr)
171
+ sys.exit(1)
172
+ challenger_idx = model_name_map[challenger_lower]
173
+ elif args.domain:
174
+ # Default challenger: GPT (index 0) when domain is set
175
+ # Reasoning: Grok is naturally contrarian anyway, so assigning GPT as challenger
176
+ # gives you two sources of pushback
177
+ challenger_idx = 0
178
+
179
+ if not args.quiet and challenger_idx is not None:
180
+ challenger_name = COUNCIL[challenger_idx][0]
181
+ print(f"(Contrainian challenger: {challenger_name})")
182
+ print()
183
+
130
184
  # Get API keys
131
185
  api_key = os.environ.get("OPENROUTER_API_KEY")
132
186
  if not api_key:
@@ -155,14 +209,28 @@ Examples:
155
209
  print()
156
210
 
157
211
  try:
158
- advocate_idx = (args.advocate - 1) if args.advocate else random.randint(0, len(COUNCIL) - 1)
212
+ # Handle deprecated --advocate flag
213
+ if args.advocate:
214
+ print("Warning: --advocate is deprecated. Use --challenger instead.", file=sys.stderr)
215
+ model_names = [n for n, _, _ in COUNCIL]
216
+ mapped_model = model_names[args.advocate - 1].lower()
217
+ print(f" Mapping --advocate {args.advocate} to --challenger {mapped_model}", file=sys.stderr)
218
+ if not args.challenger:
219
+ args.challenger = mapped_model
220
+ # Re-resolve challenger_idx after mapping
221
+ challenger_lower = args.challenger.lower()
222
+ model_name_map = {n.lower(): i for i, (n, _, _) in enumerate(COUNCIL)}
223
+ challenger_idx = model_name_map.get(challenger_lower, 0)
159
224
 
160
225
  if not args.quiet and args.persona:
161
226
  print(f"(Persona context: {args.persona})")
162
227
  print()
228
+
229
+ # Show starting challenger (now rotates each round)
163
230
  if not args.quiet:
164
- advocate_name = COUNCIL[advocate_idx][0]
165
- print(f"(Devil's advocate: {advocate_name})")
231
+ starting_challenger_idx = challenger_idx if challenger_idx is not None else 0
232
+ starting_challenger_name = COUNCIL[starting_challenger_idx][0]
233
+ print(f"(Starting challenger: {starting_challenger_name}, rotates each round)")
166
234
  print()
167
235
 
168
236
  transcript, failed_models = run_council(
@@ -178,9 +246,32 @@ Examples:
178
246
  context=args.context,
179
247
  social_mode=social_mode,
180
248
  persona=args.persona,
181
- advocate_idx=advocate_idx,
249
+ domain=domain_context,
250
+ challenger_idx=challenger_idx,
251
+ format=args.format,
182
252
  )
183
253
 
254
+ # Followup mode
255
+ followup_transcript = ""
256
+ if args.followup and not args.quiet:
257
+ print("\n" + "=" * 60)
258
+ print("Enter topic to explore further (or 'done'): ", end="", flush=True)
259
+ topic = input().strip()
260
+
261
+ if topic and topic.lower() != "done":
262
+ domain_ctxt = DOMAIN_CONTEXTS.get(domain_context, "") if domain_context else ""
263
+ followup_transcript = run_followup_discussion(
264
+ question=args.question,
265
+ topic=topic,
266
+ council_config=COUNCIL,
267
+ api_key=api_key,
268
+ domain_context=domain_ctxt,
269
+ social_mode=social_mode,
270
+ persona=args.persona,
271
+ verbose=not args.quiet,
272
+ )
273
+ transcript += "\n\n" + followup_transcript
274
+
184
275
  # Print failure summary
185
276
  if failed_models and not args.quiet:
186
277
  print()
@@ -4,6 +4,9 @@ import asyncio
4
4
  import httpx
5
5
  import json
6
6
  import re
7
+ import time
8
+ import yaml
9
+ from datetime import datetime
7
10
  from pathlib import Path
8
11
 
9
12
  OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
@@ -14,15 +17,24 @@ MOONSHOT_URL = "https://api.moonshot.cn/v1/chat/completions"
14
17
  # Format: (name, openrouter_model, fallback) - fallback is (provider, model) or None
15
18
  # Providers: "google" = AI Studio, "moonshot" = Moonshot API
16
19
  COUNCIL = [
17
- ("Claude", "anthropic/claude-opus-4.5", None),
18
20
  ("GPT", "openai/gpt-5.2-pro", None),
19
21
  ("Gemini", "google/gemini-3-pro-preview", ("google", "gemini-2.5-pro")),
20
22
  ("Grok", "x-ai/grok-4", None),
21
23
  ("Kimi", "moonshotai/kimi-k2.5", ("moonshot", "kimi-k2.5")),
22
24
  ]
23
25
 
26
+ # Claude is judge-only (not in council) to avoid conflict of interest
24
27
  JUDGE_MODEL = "anthropic/claude-opus-4.5"
25
28
 
29
+ # Domain-specific regulatory contexts
30
+ DOMAIN_CONTEXTS = {
31
+ "banking": "You are operating in a banking/financial services regulatory environment. Consider: HKMA/MAS/FCA requirements, Model Risk Management (MRM) expectations, audit trail needs, BCBS 239 governance, explainability requirements, documentation standards, and regulatory scrutiny levels.",
32
+ "healthcare": "You are operating in a healthcare regulatory environment. Consider: HIPAA constraints on PHI handling, FDA requirements for medical devices, clinical validation expectations, interoperability standards (FHIR), GxP compliance, and patient safety requirements.",
33
+ "eu": "You are operating in the EU regulatory environment. Consider: GDPR data protection requirements, EU AI Act risk categorization, Digital Markets Act compliance, cross-border data transfer rules (Schrems II), and EU data localization expectations.",
34
+ "fintech": "You are operating in a fintech regulatory environment. Consider: KYC/AML requirements, PSD2 banking regulations, e-money licensing expectations, payment services directive compliance, and financial consumer protection rules.",
35
+ "bio": "You are operating in a biotech/pharma regulatory environment. Consider: FDA/EMA drug approval processes, GMP manufacturing requirements, clinical trial design expectations, pharmacovigilance obligations, and post-market surveillance requirements.",
36
+ }
37
+
26
38
  # Keywords that suggest social/conversational context (auto-detect)
27
39
  SOCIAL_KEYWORDS = [
28
40
  "interview", "ask him", "ask her", "ask them", "question to ask",
@@ -31,9 +43,12 @@ SOCIAL_KEYWORDS = [
31
43
  "what should i say", "how should i respond", "conversation",
32
44
  ]
33
45
 
34
- # Thinking models don't stream well - use non-streaming for these
46
+ # Thinking models - use non-streaming, higher tokens, longer timeout
35
47
  THINKING_MODEL_SUFFIXES = {
48
+ "claude-opus-4.5",
49
+ "gpt-5.2-pro",
36
50
  "gemini-3-pro-preview",
51
+ "grok-4",
37
52
  "kimi-k2.5",
38
53
  "deepseek-r1",
39
54
  "o1-preview", "o1-mini", "o1",
@@ -440,6 +455,7 @@ async def run_blind_phase_parallel(
440
455
  moonshot_api_key: str | None = None,
441
456
  verbose: bool = True,
442
457
  persona: str | None = None,
458
+ domain_context: str = "",
443
459
  ) -> list[tuple[str, str, str]]:
444
460
  """Parallel blind first-pass: all models stake claims simultaneously."""
445
461
  blind_system = """You are participating in the BLIND PHASE of a council deliberation.
@@ -454,6 +470,13 @@ Provide a CLAIM SKETCH (not a full response):
454
470
 
455
471
  Keep it concise (~100 words). The full deliberation comes later."""
456
472
 
473
+ if domain_context:
474
+ blind_system += f"""
475
+
476
+ DOMAIN CONTEXT: {domain_context}
477
+
478
+ Apply this regulatory domain context to your analysis."""
479
+
457
480
  if persona:
458
481
  blind_system += f"""
459
482
 
@@ -519,28 +542,188 @@ def sanitize_speaker_content(content: str) -> str:
519
542
  return sanitized
520
543
 
521
544
 
522
- def detect_consensus(conversation: list[tuple[str, str]], council_size: int) -> tuple[bool, str]:
523
- """Detect if council has converged. Returns (converged, reason)."""
545
+ def detect_consensus(
546
+ conversation: list[tuple[str, str]],
547
+ council_config: list[tuple[str, str, tuple[str, str] | None]],
548
+ current_challenger_idx: int | None = None,
549
+ ) -> tuple[bool, str]:
550
+ """Detect if council has converged. Returns (converged, reason).
551
+
552
+ Excludes the current challenger from consensus count since they're
553
+ structurally incentivized to disagree.
554
+ """
555
+ council_size = len(council_config)
556
+
524
557
  if len(conversation) < council_size:
525
558
  return False, "insufficient responses"
526
559
 
527
- recent = [text for _, text in conversation[-council_size:]]
560
+ recent = conversation[-council_size:]
528
561
 
529
- consensus_count = sum(1 for text in recent if "CONSENSUS:" in text.upper())
530
- if consensus_count >= council_size - 1:
562
+ # Exclude challenger from consensus count
563
+ if current_challenger_idx is not None:
564
+ challenger_name = council_config[current_challenger_idx][0]
565
+ recent = [(name, text) for name, text in recent if name != challenger_name]
566
+
567
+ effective_size = len(recent)
568
+ if effective_size == 0:
569
+ return False, "no non-challenger responses"
570
+
571
+ threshold = effective_size - 1 # Need all-but-one non-challengers to agree
572
+
573
+ consensus_count = sum(1 for _, text in recent if "CONSENSUS:" in text.upper())
574
+ if consensus_count >= threshold:
531
575
  return True, "explicit consensus signals"
532
576
 
533
577
  agreement_phrases = ["i agree with", "i concur", "we all agree", "consensus emerging"]
534
578
  agreement_count = sum(
535
- 1 for text in recent
579
+ 1 for _, text in recent
536
580
  if any(phrase in text.lower() for phrase in agreement_phrases)
537
581
  )
538
- if agreement_count >= council_size - 1:
582
+ if agreement_count >= threshold:
539
583
  return True, "agreement language detected"
540
584
 
541
585
  return False, "no consensus"
542
586
 
543
587
 
588
+ def extract_structured_summary(
589
+ judge_response: str,
590
+ question: str,
591
+ models_used: list[str],
592
+ rounds: int,
593
+ duration: float,
594
+ cost: float,
595
+ ) -> dict:
596
+ lines = judge_response.split('\n')
597
+
598
+ decision = ""
599
+ confidence = "medium"
600
+ reasoning = ""
601
+ dissents = []
602
+ action_items = []
603
+
604
+ for i, line in enumerate(lines):
605
+ line_lower = line.lower()
606
+ if 'recommend' in line_lower or 'decision:' in line_lower:
607
+ decision = line.strip()
608
+ elif 'dissent' in line_lower or 'disagree' in line_lower:
609
+ dissents.append({"model": "Unknown", "concern": line.strip()})
610
+ elif 'action' in line_lower or 'next step' in line_lower:
611
+ action_items.append({"action": line.strip(), "priority": "medium"})
612
+
613
+ if not decision:
614
+ for line in lines:
615
+ if len(line.strip()) > 20:
616
+ decision = line.strip()
617
+ break
618
+
619
+ return {
620
+ "schema_version": "1.0",
621
+ "question": question,
622
+ "decision": decision[:500] if decision else "See transcript for details",
623
+ "confidence": confidence,
624
+ "reasoning_summary": judge_response[:1000],
625
+ "dissents": dissents[:5],
626
+ "action_items": action_items[:5],
627
+ "meta": {
628
+ "timestamp": datetime.now().isoformat(),
629
+ "models_used": models_used,
630
+ "rounds": rounds,
631
+ "duration_seconds": duration,
632
+ "estimated_cost_usd": cost
633
+ }
634
+ }
635
+
636
+
637
+ def run_followup_discussion(
638
+ question: str,
639
+ topic: str,
640
+ council_config: list[tuple[str, str, tuple[str, str] | None]],
641
+ api_key: str,
642
+ domain_context: str = "",
643
+ social_mode: bool = False,
644
+ persona: str | None = None,
645
+ verbose: bool = True,
646
+ ) -> str:
647
+ """Run a focused followup discussion on a specific topic with 2 models. Returns the followup transcript."""
648
+ # Use judge (Claude 0) and one other model (GPT 1) for followup
649
+ followup_models = council_config[:2] # Claude and GPT
650
+
651
+ followup_transcript_parts = []
652
+
653
+ if verbose:
654
+ print()
655
+ print("=" * 60)
656
+ print(f"FOLLOWUP: {topic}")
657
+ print("=" * 60)
658
+ print()
659
+
660
+ social_constraint = """
661
+
662
+ SOCIAL CALIBRATION: This is a social/conversational context (interview, networking, outreach).
663
+ Your output should feel natural in conversation - something you'd actually say over coffee.
664
+ Avoid structured, multi-part diagnostic questions that sound like interrogation.
665
+ Simple and human beats strategic and comprehensive. Optimize for being relatable, not thorough.""" if social_mode else ""
666
+
667
+ followup_parts = [
668
+ "You are participating in a FOCUSED FOLLOWUP discussion on a specific topic.",
669
+ "",
670
+ f"The main council has concluded, and we're now drilling down into:",
671
+ f"TOPIC: {topic}",
672
+ "",
673
+ "Keep your response focused on this specific topic. Don't rehash the full council deliberation.",
674
+ "Be concise and practical.",
675
+ "",
676
+ ]
677
+
678
+ if social_constraint:
679
+ followup_parts.append(social_constraint.strip())
680
+
681
+ if persona:
682
+ followup_parts.extend([
683
+ "",
684
+ "IMPORTANT CONTEXT about the person asking:",
685
+ persona,
686
+ "",
687
+ "Factor this into your advice — don't just give strategically optimal answers, consider what fits THIS person.",
688
+ ])
689
+
690
+ if domain_context:
691
+ followup_parts.extend([
692
+ "",
693
+ f"DOMAIN CONTEXT: {domain_context}",
694
+ "",
695
+ "Apply this regulatory domain context to your analysis.",
696
+ ])
697
+
698
+ followup_system = "\n".join(followup_parts)
699
+
700
+ followup_transcript_parts.append(f"### Followup Discussion: {topic}\n")
701
+
702
+ for i, (name, model, fallback) in enumerate(followup_models):
703
+ messages = [
704
+ {"role": "system", "content": followup_system},
705
+ {"role": "user", "content": f"Original Question:\n\n{question}\n\nFocus your response on: {topic}"},
706
+ ]
707
+
708
+ if verbose:
709
+ print(f"### {name}")
710
+
711
+ response = query_model(api_key, model, messages, stream=verbose)
712
+
713
+ if verbose:
714
+ print()
715
+
716
+ followup_transcript_parts.append(f"### {name}\n{response}\n")
717
+
718
+ if verbose:
719
+ print("=" * 60)
720
+ print("FOLLOWUP COMPLETE")
721
+ print("=" * 60)
722
+ print()
723
+
724
+ return "\n\n".join(followup_transcript_parts)
725
+
726
+
544
727
  def run_council(
545
728
  question: str,
546
729
  council_config: list[tuple[str, str, tuple[str, str] | None]],
@@ -554,17 +737,22 @@ def run_council(
554
737
  context: str | None = None,
555
738
  social_mode: bool = False,
556
739
  persona: str | None = None,
557
- advocate_idx: int | None = None,
740
+ domain: str | None = None,
741
+ challenger_idx: int | None = None, # Starting challenger index, rotates each round
742
+ format: str = "prose",
558
743
  ) -> tuple[str, list[str]]:
559
744
  """Run the council deliberation. Returns (transcript, failed_models)."""
560
745
 
746
+ start_time = time.time()
747
+
748
+ domain_context = DOMAIN_CONTEXTS.get(domain, "") if domain else ""
561
749
  council_names = [name for name, _, _ in council_config]
562
750
  blind_claims = []
563
751
  failed_models = []
564
752
 
565
753
  if blind:
566
754
  blind_claims = asyncio.run(run_blind_phase_parallel(
567
- question, council_config, api_key, google_api_key, moonshot_api_key, verbose, persona
755
+ question, council_config, api_key, google_api_key, moonshot_api_key, verbose, persona, domain_context
568
756
  ))
569
757
  for name, model_name, claims in blind_claims:
570
758
  if claims.startswith("["):
@@ -580,6 +768,8 @@ def run_council(
580
768
  if anonymous:
581
769
  print("(Models see each other as Speaker 1, 2, etc. to prevent bias)")
582
770
  print(f"Rounds: {rounds}")
771
+ if domain:
772
+ print(f"Domain context: {domain}")
583
773
  print(f"Question: {question[:100]}{'...' if len(question) > 100 else ''}")
584
774
  print()
585
775
  print("=" * 60)
@@ -589,6 +779,7 @@ def run_council(
589
779
 
590
780
  conversation = []
591
781
  output_parts = []
782
+ current_round = 0
592
783
 
593
784
  if blind_claims:
594
785
  for name, model_name, claims in blind_claims:
@@ -609,19 +800,6 @@ Your output should feel natural in conversation - something you'd actually say o
609
800
  Avoid structured, multi-part diagnostic questions that sound like interrogation.
610
801
  Simple and human beats strategic and comprehensive. Optimize for being relatable, not thorough."""
611
802
 
612
- devils_advocate_addition = """
613
-
614
- SPECIAL ROLE: You are the DEVIL'S ADVOCATE. Your job is to push back HARD.
615
-
616
- REQUIREMENTS:
617
- 1. You MUST explicitly DISAGREE with at least one major point from the other speakers
618
- 2. Identify the weakest assumption in the emerging consensus and attack it
619
- 3. Consider: What would make this advice WRONG? What's the contrarian take?
620
- 4. If everyone is converging too fast, that's a red flag — find the hidden complexity
621
-
622
- Don't just "add nuance" or "build on" — find something to genuinely challenge.
623
- If you can't find real disagreement, say why the consensus might be groupthink."""
624
-
625
803
  first_speaker_with_blind = """You are {name}, speaking first in Round {round_num} of a council deliberation.
626
804
 
627
805
  You've seen everyone's BLIND CLAIMS (their independent initial positions). Now engage:
@@ -652,7 +830,22 @@ Previous speakers this round: {previous_speakers}
652
830
  Be direct. Challenge weak arguments. Don't be sycophantic.
653
831
  Prioritize PRACTICAL, ACTIONABLE advice over academic observations. Avoid jargon."""
654
832
 
833
+ challenger_addition = """
834
+
835
+ SPECIAL ROLE: You are the CHALLENGER for this round. Your job is to argue the CONTRARIAN position.
836
+
837
+ REQUIREMENTS:
838
+ 1. You MUST explicitly DISAGREE with at least one major point from the other speakers
839
+ 2. Identify the weakest assumption in the emerging consensus and attack it
840
+ 3. Name ONE specific thing that would make the consensus WRONG
841
+ 4. You CANNOT use phrases like "building on", "adding nuance", or "I largely agree"
842
+ 5. If everyone is converging too fast, that's a red flag — find the hidden complexity
843
+
844
+ Even if you ultimately agree with the direction, you MUST articulate the strongest possible counter-argument.
845
+ If you can't find real disagreement, explain why the consensus might be groupthink."""
846
+
655
847
  for round_num in range(rounds):
848
+ current_round = round_num + 1
656
849
  round_speakers = []
657
850
  for idx, (name, model, fallback) in enumerate(council_config):
658
851
  dname = display_names[name]
@@ -673,6 +866,13 @@ Prioritize PRACTICAL, ACTIONABLE advice over academic observations. Avoid jargon
673
866
  previous_speakers=previous
674
867
  )
675
868
 
869
+ if domain_context:
870
+ system_prompt += f"""
871
+
872
+ DOMAIN CONTEXT: {domain_context}
873
+
874
+ Apply this regulatory domain context to your analysis."""
875
+
676
876
  if social_mode:
677
877
  system_prompt += social_constraint
678
878
 
@@ -684,8 +884,16 @@ IMPORTANT CONTEXT about the person asking:
684
884
 
685
885
  Factor this into your advice — don't just give strategically optimal answers, consider what fits THIS person."""
686
886
 
687
- if idx == advocate_idx and round_num == 0:
688
- system_prompt += devils_advocate_addition
887
+ # Calculate rotating challenger for this round
888
+ if challenger_idx is not None:
889
+ # Explicit --challenger sets starting point, then rotates
890
+ current_challenger = (challenger_idx + round_num) % len(council_config)
891
+ else:
892
+ # Default: start with Claude (index 0), rotate through council
893
+ current_challenger = round_num % len(council_config)
894
+
895
+ if idx == current_challenger:
896
+ system_prompt += challenger_addition
689
897
 
690
898
  user_content = f"Question for the council:\n\n{question}"
691
899
  if blind_context:
@@ -705,9 +913,10 @@ Factor this into your advice — don't just give strategically optimal answers,
705
913
  })
706
914
 
707
915
  model_name = model.split("/")[-1]
916
+ challenger_indicator = " (challenger)" if idx == current_challenger else ""
708
917
 
709
918
  if verbose:
710
- print(f"### {model_name}")
919
+ print(f"### {model_name}{challenger_indicator}")
711
920
  if is_thinking_model(model):
712
921
  print("(thinking...)", flush=True)
713
922
 
@@ -743,9 +952,10 @@ Factor this into your advice — don't just give strategically optimal answers,
743
952
  if verbose:
744
953
  print()
745
954
 
746
- output_parts.append(f"### {model_name}\n{response}")
955
+ output_parts.append(f"### {model_name}{challenger_indicator}\n{response}")
747
956
 
748
- converged, reason = detect_consensus(conversation, len(council_config))
957
+ # current_challenger already calculated in the speaker loop above
958
+ converged, reason = detect_consensus(conversation, council_config, current_challenger)
749
959
  if converged:
750
960
  if verbose:
751
961
  print(f">>> CONSENSUS DETECTED ({reason}) - proceeding to judge\n")
@@ -755,6 +965,10 @@ Factor this into your advice — don't just give strategically optimal answers,
755
965
  context_hint = ""
756
966
  if context:
757
967
  context_hint = f"\n\nContext about this question: {context}\nConsider this context when weighing perspectives and forming recommendations."
968
+
969
+ domain_hint = ""
970
+ if domain_context:
971
+ domain_hint = f"\n\nDOMAIN CONTEXT: {domain}\nConsider this regulatory domain context when weighing perspectives and forming recommendations."
758
972
 
759
973
  social_judge_section = ""
760
974
  if social_mode:
@@ -763,14 +977,17 @@ Factor this into your advice — don't just give strategically optimal answers,
763
977
  ## Social Calibration Check
764
978
  [Would the recommendation feel natural in conversation? Is it something you'd actually say, or does it sound like strategic over-optimization? If the council produced something too formal/structured, suggest a simpler, more human alternative.]"""
765
979
 
766
- judge_system = f"""You are the Judge, responsible for synthesizing the council's deliberation.{context_hint}
980
+ judge_system = f"""You are the Judge (Claude), responsible for synthesizing the council's deliberation.{context_hint}{domain_hint}
981
+
982
+ You did NOT participate in the deliberation — you're seeing it fresh. This gives you objectivity.
767
983
 
768
984
  After the council members have shared their perspectives, you:
769
985
  1. Identify points of AGREEMENT across all members
770
986
  2. Identify points of DISAGREEMENT and explain the different views
771
- 3. Provide a SYNTHESIS that captures the council's collective wisdom
772
- 4. Give a final RECOMMENDATION based on the deliberation
773
- {"5. SOCIAL CALIBRATION: Check if the recommendation would feel natural in actual conversation" if social_mode else ""}
987
+ 3. Add YOUR OWN perspective what did the council miss? What's your independent take?
988
+ 4. Provide a SYNTHESIS that integrates the council's views with your own
989
+ 5. Give a final RECOMMENDATION based on everything
990
+ {"6. SOCIAL CALIBRATION: Check if the recommendation would feel natural in actual conversation" if social_mode else ""}
774
991
 
775
992
  Format your response as:
776
993
 
@@ -780,13 +997,16 @@ Format your response as:
780
997
  ## Points of Disagreement
781
998
  [Where views differ and why]
782
999
 
1000
+ ## Judge's Own Take
1001
+ [Your independent perspective. What did the council miss or underweight? What would YOU add to this discussion?]
1002
+
783
1003
  ## Synthesis
784
- [The integrated perspective]
1004
+ [The integrated perspective, combining council views with your own]
785
1005
 
786
1006
  ## Recommendation
787
- [Your final recommendation based on the deliberation]
1007
+ [Your final recommendation]
788
1008
  {social_judge_section}
789
- Be balanced and fair. Acknowledge minority views. Don't just pick a winner.{" For social contexts, prioritize natural/human output over strategic optimization." if social_mode else ""}
1009
+ Be balanced and fair. Acknowledge minority views. But don't be afraid to have your own opinion — you're the judge, not just a summarizer.{" For social contexts, prioritize natural/human output over strategic optimization." if social_mode else ""}
790
1010
 
791
1011
  IMPORTANT: In your Recommendation, clearly distinguish:
792
1012
  - **Do Now** — practical actions the user can take immediately
@@ -815,6 +1035,21 @@ Don't recommend building infrastructure for problems that don't exist yet."""
815
1035
 
816
1036
  output_parts.append(f"### Judge ({judge_model_name})\n{judge_response}")
817
1037
 
1038
+ if format != 'prose':
1039
+ structured = extract_structured_summary(
1040
+ judge_response=judge_response,
1041
+ question=question,
1042
+ models_used=[name for name, _, _ in council_config],
1043
+ rounds=current_round if rounds > 0 else 1,
1044
+ duration=time.time() - start_time,
1045
+ cost=0.85,
1046
+ )
1047
+
1048
+ if format == 'json':
1049
+ output_parts.append('\n\n---\n\n' + json.dumps(structured, indent=2, ensure_ascii=False))
1050
+ else:
1051
+ output_parts.append('\n\n---\n\n' + yaml.dump(structured, allow_unicode=True, default_flow_style=False))
1052
+
818
1053
  if anonymous:
819
1054
  final_output = "\n\n".join(output_parts)
820
1055
  for name, model, _ in council_config:
@@ -0,0 +1,27 @@
1
+ from pydantic import BaseModel
2
+ from typing import Literal
3
+
4
+ class ActionItem(BaseModel):
5
+ action: str
6
+ priority: Literal["high", "medium", "low"] = "medium"
7
+
8
+ class Dissent(BaseModel):
9
+ model: str
10
+ concern: str
11
+
12
+ class CouncilMeta(BaseModel):
13
+ timestamp: str
14
+ models_used: list[str]
15
+ rounds: int
16
+ duration_seconds: float
17
+ estimated_cost_usd: float
18
+
19
+ class CouncilOutput(BaseModel):
20
+ schema_version: str = "1.0"
21
+ question: str
22
+ decision: str
23
+ confidence: Literal["low", "medium", "high"]
24
+ reasoning_summary: str
25
+ dissents: list[Dissent]
26
+ action_items: list[ActionItem]
27
+ meta: CouncilMeta
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: frontier-council
3
- Version: 0.1.2
4
- Summary: Multi-model deliberation for important decisions. 5 frontier LLMs debate, then a judge synthesizes consensus.
3
+ Version: 0.2.0
4
+ Summary: Multi-model deliberation for important decisions. 4 frontier LLMs debate with rotating challenger, then Claude judges.
5
5
  Project-URL: Homepage, https://github.com/terry-li-hm/frontier-council
6
6
  Project-URL: Repository, https://github.com/terry-li-hm/frontier-council
7
7
  Project-URL: Issues, https://github.com/terry-li-hm/frontier-council/issues
@@ -19,22 +19,25 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Requires-Python: >=3.11
21
21
  Requires-Dist: httpx>=0.25.0
22
+ Requires-Dist: pydantic>=2.0
23
+ Requires-Dist: pyyaml>=6.0
22
24
  Description-Content-Type: text/markdown
23
25
 
24
26
  # Frontier Council
25
27
 
26
- Multi-model deliberation for important decisions. 5 frontier LLMs debate a question, then a judge synthesizes consensus.
28
+ Multi-model deliberation for important decisions. 4 frontier LLMs debate a question, then Claude judges and synthesizes.
27
29
 
28
- Inspired by [Andrej Karpathy's LLM Council](https://github.com/karpathy/llm-council), with added blind phase (anti-anchoring), explicit engagement requirements, devil's advocate role, and social calibration mode.
30
+ Inspired by [Andrej Karpathy's LLM Council](https://github.com/karpathy/llm-council), with added blind phase (anti-anchoring), explicit engagement requirements, rotating challenger role, and social calibration mode.
29
31
 
30
32
  ## Models
31
33
 
32
- - Claude (claude-opus-4.5)
34
+ **Council (deliberators):**
33
35
  - GPT (gpt-5.2-pro)
34
36
  - Gemini (gemini-3-pro-preview)
35
37
  - Grok (grok-4)
36
38
  - Kimi (kimi-k2.5)
37
- - Judge: Claude Opus 4.5
39
+
40
+ **Judge:** Claude Opus 4.5 (synthesizes + adds own perspective)
38
41
 
39
42
  ## Installation
40
43
 
@@ -99,7 +102,9 @@ All sessions are auto-saved to `~/.frontier-council/sessions/` for later review.
99
102
  | `--share` | Upload transcript to secret GitHub Gist |
100
103
  | `--social` | Enable social calibration mode (auto-detected for interview/networking) |
101
104
  | `--persona TEXT` | Context about the person asking |
102
- | `--advocate N` | Which speaker (1-5) should be devil's advocate (default: random) |
105
+ | `--challenger MODEL` | Which model starts as challenger (gpt/gemini/grok/kimi). Rotates each round. |
106
+ | `--domain DOMAIN` | Regulatory domain context (banking, healthcare, eu, fintech, bio) |
107
+ | `--followup` | Enable interactive drill-down after judge synthesis |
103
108
  | `--quiet` | Suppress progress output |
104
109
  | `--sessions` | List recent saved sessions |
105
110
  | `--no-save` | Don't auto-save transcript to ~/.frontier-council/sessions/ |
@@ -114,9 +119,15 @@ All sessions are auto-saved to `~/.frontier-council/sessions/` for later review.
114
119
  **Deliberation Protocol:**
115
120
  1. All models see everyone's blind claims, then deliberate
116
121
  2. Each model MUST explicitly AGREE, DISAGREE, or BUILD ON previous speakers by name
117
- 3. After each round, the system checks for consensus (4/5 agreement triggers early exit)
122
+ 3. After each round, the system checks for consensus (3/4 non-challengers agreeing triggers early exit)
118
123
  4. Judge synthesizes the full deliberation
119
124
 
125
+ **Rotating Challenger:**
126
+ - One model each round is assigned the "challenger" role
127
+ - The challenger MUST argue the contrarian position and identify weaknesses in emerging consensus
128
+ - Role rotates each round (GPT R1 → Gemini R2 → Grok R3 → Kimi R4...) to ensure sustained disagreement
129
+ - Challenger is excluded from consensus detection (forced disagreement shouldn't block early exit)
130
+
120
131
  **Anonymous Deliberation:**
121
132
  - Models see each other as "Speaker 1", "Speaker 2", etc. during deliberation
122
133
  - Prevents models from playing favorites based on vendor reputation
@@ -0,0 +1,9 @@
1
+ frontier_council/__init__.py,sha256=lJJIdVKStG2zLYlKMtvIpxK3S2D0NsWK_ZIKc86y2VM,357
2
+ frontier_council/cli.py,sha256=eN3EdJGWEkQ4OEmWq2s1IQo3-Q8kPUrQMgcPMRkCDa8,13796
3
+ frontier_council/council.py,sha256=ntu4D0NQizglfxGxDvuXZ7hjRXWqDNgzaIVZci8cmzc,39617
4
+ frontier_council/schema.py,sha256=j4436pYP-PtBorQZve6dpA7JqYlkTIlkxMyufSmVra0,633
5
+ frontier_council-0.2.0.dist-info/METADATA,sha256=jAZRJ0fpGxkM_zsmTMFLe_WRRwWulr8PVIm5xwsFNy4,5921
6
+ frontier_council-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
7
+ frontier_council-0.2.0.dist-info/entry_points.txt,sha256=I3xjPK-nupfQz5PANVXUnXjuxlP-4-mykkA3wXhFOGY,63
8
+ frontier_council-0.2.0.dist-info/licenses/LICENSE,sha256=8qmwox7khp-AakNVvL-Ga25eYbsCtLx8RyXM4zKkX0w,1065
9
+ frontier_council-0.2.0.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- frontier_council/__init__.py,sha256=VAITtl8rVW6wSN4iN86TVOmB2D2nrGu8ZI9ezDnOL3I,357
2
- frontier_council/cli.py,sha256=9ZJgdFXHSgFDACf30veWb3uHruxlueVQY-N8hHSJL9M,9772
3
- frontier_council/council.py,sha256=u2ir34dNostBOhXUi1R0wFEfBIEgiRX8thiS5lRFnnU,30226
4
- frontier_council-0.1.2.dist-info/METADATA,sha256=E9iEMkdVEBtmA2NyprKIWipDAnR0gHWVGbIonlmVYrc,5229
5
- frontier_council-0.1.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
6
- frontier_council-0.1.2.dist-info/entry_points.txt,sha256=I3xjPK-nupfQz5PANVXUnXjuxlP-4-mykkA3wXhFOGY,63
7
- frontier_council-0.1.2.dist-info/licenses/LICENSE,sha256=8qmwox7khp-AakNVvL-Ga25eYbsCtLx8RyXM4zKkX0w,1065
8
- frontier_council-0.1.2.dist-info/RECORD,,