PyPI - frontier-council - Versions diffs - 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

frontier-council 0.1.2py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

frontier_council/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """Frontier Council - Multi-model deliberation for important decisions."""
-__version__ = "0.1.2"
+__version__ = "0.1.3"
 from .council import (
     run_council,

frontier_council/cli.py CHANGED Viewed

@@ -29,6 +29,8 @@ from .council import (
     COUNCIL,
     detect_social_context,
     run_council,
+    DOMAIN_CONTEXTS,
+    run_followup_discussion,
 )
@@ -42,9 +44,10 @@ Examples:
   frontier-council "What questions should I ask?" --social
   frontier-council "Career decision" --persona "builder who hates process work"
   frontier-council "Architecture choice" --rounds 3 --output transcript.md
+  frontier-council "Decision" --domain banking --followup --output counsel.md
         """,
     )
-    parser.add_argument("question", help="The question for the council to deliberate")
+    parser.add_argument("question", nargs="?", help="The question for the council to deliberate")
     parser.add_argument(
         "--rounds",
         type=int,
@@ -74,6 +77,12 @@ Examples:
         "--context", "-c",
         help="Context hint for the judge (e.g., 'architecture decision', 'ethics question')",
     )
+    parser.add_argument(
+        "--format", "-f",
+        choices=["json", "yaml", "prose"],
+        default="prose",
+        help="Output format: json (machine-parseable), yaml (structured), prose (default)",
+    )
     parser.add_argument(
         "--share",
         action="store_true",
@@ -92,7 +101,20 @@ Examples:
         "--advocate",
         type=int,
         choices=[1, 2, 3, 4, 5],
-        help="Which speaker (1-5) should be devil's advocate (default: random)",
+        help="DEPRECATED: Use --challenger instead. Maps to --challenger by model name.",
+    )
+    parser.add_argument(
+        "--domain",
+        help="Regulatory domain context (banking, healthcare, eu, fintech, bio)",
+    )
+    parser.add_argument(
+        "--challenger",
+        help="Which model should argue contrarian (claude, gpt, gemini, grok, kimi). Default: claude",
+    )
+    parser.add_argument(
+        "--followup",
+        action="store_true",
+        help="Enable followup mode to drill into specific points after judge synthesis",
     )
     parser.add_argument(
         "--no-save",
@@ -121,12 +143,44 @@ Examples:
                 print(f"\n  ... and {len(sessions) - 20} more")
         sys.exit(0)
+    # Require question for normal operation
+    if not args.question:
+        parser.error("the following arguments are required: question")
     # Auto-detect social context if not explicitly set
     social_mode = args.social or detect_social_context(args.question)
     if social_mode and not args.social and not args.quiet:
         print("(Auto-detected social context - enabling social calibration mode)")
         print()
+    # Validate and resolve domain
+    domain_context = None
+    if args.domain:
+        if args.domain.lower() not in DOMAIN_CONTEXTS:
+            print(f"Error: Unknown domain '{args.domain}'. Valid domains: {', '.join(DOMAIN_CONTEXTS.keys())}", file=sys.stderr)
+            sys.exit(1)
+        domain_context = args.domain.lower()
+    # Resolve challenger model
+    challenger_idx = None
+    if args.challenger:
+        challenger_lower = args.challenger.lower()
+        model_name_map = {n.lower(): i for i, (n, _, _) in enumerate(COUNCIL)}
+        if challenger_lower not in model_name_map:
+            print(f"Error: Unknown model '{args.challenger}'. Valid models: {', '.join(n for n, _, _ in COUNCIL)}", file=sys.stderr)
+            sys.exit(1)
+        challenger_idx = model_name_map[challenger_lower]
+    elif args.domain:
+        # Default challenger: GPT (index 0) when domain is set
+        # Reasoning: Grok is naturally contrarian anyway, so assigning GPT as challenger
+        # gives you two sources of pushback
+        challenger_idx = 0
+    if not args.quiet and challenger_idx is not None:
+        challenger_name = COUNCIL[challenger_idx][0]
+        print(f"(Contrainian challenger: {challenger_name})")
+        print()
     # Get API keys
     api_key = os.environ.get("OPENROUTER_API_KEY")
     if not api_key:
@@ -155,14 +209,28 @@ Examples:
         print()
     try:
-        advocate_idx = (args.advocate - 1) if args.advocate else random.randint(0, len(COUNCIL) - 1)
+        # Handle deprecated --advocate flag
+        if args.advocate:
+            print("Warning: --advocate is deprecated. Use --challenger instead.", file=sys.stderr)
+            model_names = [n for n, _, _ in COUNCIL]
+            mapped_model = model_names[args.advocate - 1].lower()
+            print(f"  Mapping --advocate {args.advocate} to --challenger {mapped_model}", file=sys.stderr)
+            if not args.challenger:
+                args.challenger = mapped_model
+            # Re-resolve challenger_idx after mapping
+            challenger_lower = args.challenger.lower()
+            model_name_map = {n.lower(): i for i, (n, _, _) in enumerate(COUNCIL)}
+            challenger_idx = model_name_map.get(challenger_lower, 0)
         if not args.quiet and args.persona:
             print(f"(Persona context: {args.persona})")
             print()
+        # Show starting challenger (now rotates each round)
         if not args.quiet:
-            advocate_name = COUNCIL[advocate_idx][0]
-            print(f"(Devil's advocate: {advocate_name})")
+            starting_challenger_idx = challenger_idx if challenger_idx is not None else 0
+            starting_challenger_name = COUNCIL[starting_challenger_idx][0]
+            print(f"(Starting challenger: {starting_challenger_name}, rotates each round)")
             print()
         transcript, failed_models = run_council(
@@ -178,9 +246,32 @@ Examples:
             context=args.context,
             social_mode=social_mode,
             persona=args.persona,
-            advocate_idx=advocate_idx,
+            domain=domain_context,
+            challenger_idx=challenger_idx,
+            format=args.format,
         )
+        # Followup mode
+        followup_transcript = ""
+        if args.followup and not args.quiet:
+            print("\n" + "=" * 60)
+            print("Enter topic to explore further (or 'done'): ", end="", flush=True)
+            topic = input().strip()
+            if topic and topic.lower() != "done":
+                domain_ctxt = DOMAIN_CONTEXTS.get(domain_context, "") if domain_context else ""
+                followup_transcript = run_followup_discussion(
+                    question=args.question,
+                    topic=topic,
+                    council_config=COUNCIL,
+                    api_key=api_key,
+                    domain_context=domain_ctxt,
+                    social_mode=social_mode,
+                    persona=args.persona,
+                    verbose=not args.quiet,
+                )
+                transcript += "\n\n" + followup_transcript
         # Print failure summary
         if failed_models and not args.quiet:
             print()

frontier_council/council.py CHANGED Viewed

@@ -4,6 +4,9 @@ import asyncio
 import httpx
 import json
 import re
+import time
+import yaml
+from datetime import datetime
 from pathlib import Path
 OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
@@ -14,15 +17,24 @@ MOONSHOT_URL = "https://api.moonshot.cn/v1/chat/completions"
 # Format: (name, openrouter_model, fallback) - fallback is (provider, model) or None
 # Providers: "google" = AI Studio, "moonshot" = Moonshot API
 COUNCIL = [
-    ("Claude", "anthropic/claude-opus-4.5", None),
     ("GPT", "openai/gpt-5.2-pro", None),
     ("Gemini", "google/gemini-3-pro-preview", ("google", "gemini-2.5-pro")),
     ("Grok", "x-ai/grok-4", None),
     ("Kimi", "moonshotai/kimi-k2.5", ("moonshot", "kimi-k2.5")),
 ]
+# Claude is judge-only (not in council) to avoid conflict of interest
 JUDGE_MODEL = "anthropic/claude-opus-4.5"
+# Domain-specific regulatory contexts
+DOMAIN_CONTEXTS = {
+    "banking": "You are operating in a banking/financial services regulatory environment. Consider: HKMA/MAS/FCA requirements, Model Risk Management (MRM) expectations, audit trail needs, BCBS 239 governance, explainability requirements, documentation standards, and regulatory scrutiny levels.",
+    "healthcare": "You are operating in a healthcare regulatory environment. Consider: HIPAA constraints on PHI handling, FDA requirements for medical devices, clinical validation expectations, interoperability standards (FHIR), GxP compliance, and patient safety requirements.",
+    "eu": "You are operating in the EU regulatory environment. Consider: GDPR data protection requirements, EU AI Act risk categorization, Digital Markets Act compliance, cross-border data transfer rules (Schrems II), and EU data localization expectations.",
+    "fintech": "You are operating in a fintech regulatory environment. Consider: KYC/AML requirements, PSD2 banking regulations, e-money licensing expectations, payment services directive compliance, and financial consumer protection rules.",
+    "bio": "You are operating in a biotech/pharma regulatory environment. Consider: FDA/EMA drug approval processes, GMP manufacturing requirements, clinical trial design expectations, pharmacovigilance obligations, and post-market surveillance requirements.",
+}
 # Keywords that suggest social/conversational context (auto-detect)
 SOCIAL_KEYWORDS = [
     "interview", "ask him", "ask her", "ask them", "question to ask",
@@ -31,9 +43,12 @@ SOCIAL_KEYWORDS = [
     "what should i say", "how should i respond", "conversation",
 ]
-# Thinking models don't stream well - use non-streaming for these
+# Thinking models - use non-streaming, higher tokens, longer timeout
 THINKING_MODEL_SUFFIXES = {
+    "claude-opus-4.5",
+    "gpt-5.2-pro",
     "gemini-3-pro-preview",
+    "grok-4",
     "kimi-k2.5",
     "deepseek-r1",
     "o1-preview", "o1-mini", "o1",
@@ -440,6 +455,7 @@ async def run_blind_phase_parallel(
     moonshot_api_key: str | None = None,
     verbose: bool = True,
     persona: str | None = None,
+    domain_context: str = "",
 ) -> list[tuple[str, str, str]]:
     """Parallel blind first-pass: all models stake claims simultaneously."""
     blind_system = """You are participating in the BLIND PHASE of a council deliberation.
@@ -454,6 +470,13 @@ Provide a CLAIM SKETCH (not a full response):
 Keep it concise (~100 words). The full deliberation comes later."""
+    if domain_context:
+        blind_system += f"""
+DOMAIN CONTEXT: {domain_context}
+Apply this regulatory domain context to your analysis."""
     if persona:
         blind_system += f"""
@@ -519,28 +542,188 @@ def sanitize_speaker_content(content: str) -> str:
     return sanitized
-def detect_consensus(conversation: list[tuple[str, str]], council_size: int) -> tuple[bool, str]:
-    """Detect if council has converged. Returns (converged, reason)."""
+def detect_consensus(
+    conversation: list[tuple[str, str]],
+    council_config: list[tuple[str, str, tuple[str, str] | None]],
+    current_challenger_idx: int | None = None,
+) -> tuple[bool, str]:
+    """Detect if council has converged. Returns (converged, reason).
+    Excludes the current challenger from consensus count since they're
+    structurally incentivized to disagree.
+    """
+    council_size = len(council_config)
     if len(conversation) < council_size:
         return False, "insufficient responses"
-    recent = [text for _, text in conversation[-council_size:]]
+    recent = conversation[-council_size:]
-    consensus_count = sum(1 for text in recent if "CONSENSUS:" in text.upper())
-    if consensus_count >= council_size - 1:
+    # Exclude challenger from consensus count
+    if current_challenger_idx is not None:
+        challenger_name = council_config[current_challenger_idx][0]
+        recent = [(name, text) for name, text in recent if name != challenger_name]
+    effective_size = len(recent)
+    if effective_size == 0:
+        return False, "no non-challenger responses"
+    threshold = effective_size - 1  # Need all-but-one non-challengers to agree
+    consensus_count = sum(1 for _, text in recent if "CONSENSUS:" in text.upper())
+    if consensus_count >= threshold:
         return True, "explicit consensus signals"
     agreement_phrases = ["i agree with", "i concur", "we all agree", "consensus emerging"]
     agreement_count = sum(
-        1 for text in recent
+        1 for _, text in recent
         if any(phrase in text.lower() for phrase in agreement_phrases)
     )
-    if agreement_count >= council_size - 1:
+    if agreement_count >= threshold:
         return True, "agreement language detected"
     return False, "no consensus"
+def extract_structured_summary(
+    judge_response: str,
+    question: str,
+    models_used: list[str],
+    rounds: int,
+    duration: float,
+    cost: float,
+) -> dict:
+    lines = judge_response.split('\n')
+    decision = ""
+    confidence = "medium"
+    reasoning = ""
+    dissents = []
+    action_items = []
+    for i, line in enumerate(lines):
+        line_lower = line.lower()
+        if 'recommend' in line_lower or 'decision:' in line_lower:
+            decision = line.strip()
+        elif 'dissent' in line_lower or 'disagree' in line_lower:
+            dissents.append({"model": "Unknown", "concern": line.strip()})
+        elif 'action' in line_lower or 'next step' in line_lower:
+            action_items.append({"action": line.strip(), "priority": "medium"})
+    if not decision:
+        for line in lines:
+            if len(line.strip()) > 20:
+                decision = line.strip()
+                break
+    return {
+        "schema_version": "1.0",
+        "question": question,
+        "decision": decision[:500] if decision else "See transcript for details",
+        "confidence": confidence,
+        "reasoning_summary": judge_response[:1000],
+        "dissents": dissents[:5],
+        "action_items": action_items[:5],
+        "meta": {
+            "timestamp": datetime.now().isoformat(),
+            "models_used": models_used,
+            "rounds": rounds,
+            "duration_seconds": duration,
+            "estimated_cost_usd": cost
+        }
+    }
+def run_followup_discussion(
+    question: str,
+    topic: str,
+    council_config: list[tuple[str, str, tuple[str, str] | None]],
+    api_key: str,
+    domain_context: str = "",
+    social_mode: bool = False,
+    persona: str | None = None,
+    verbose: bool = True,
+) -> str:
+    """Run a focused followup discussion on a specific topic with 2 models. Returns the followup transcript."""
+    # Use judge (Claude 0) and one other model (GPT 1) for followup
+    followup_models = council_config[:2]  # Claude and GPT
+    followup_transcript_parts = []
+    if verbose:
+        print()
+        print("=" * 60)
+        print(f"FOLLOWUP: {topic}")
+        print("=" * 60)
+        print()
+    social_constraint = """
+SOCIAL CALIBRATION: This is a social/conversational context (interview, networking, outreach).
+Your output should feel natural in conversation - something you'd actually say over coffee.
+Avoid structured, multi-part diagnostic questions that sound like interrogation.
+Simple and human beats strategic and comprehensive. Optimize for being relatable, not thorough.""" if social_mode else ""
+    followup_parts = [
+        "You are participating in a FOCUSED FOLLOWUP discussion on a specific topic.",
+        "",
+        f"The main council has concluded, and we're now drilling down into:",
+        f"TOPIC: {topic}",
+        "",
+        "Keep your response focused on this specific topic. Don't rehash the full council deliberation.",
+        "Be concise and practical.",
+        "",
+    ]
+    if social_constraint:
+        followup_parts.append(social_constraint.strip())
+    if persona:
+        followup_parts.extend([
+            "",
+            "IMPORTANT CONTEXT about the person asking:",
+            persona,
+            "",
+            "Factor this into your advice — don't just give strategically optimal answers, consider what fits THIS person.",
+        ])
+    if domain_context:
+        followup_parts.extend([
+            "",
+            f"DOMAIN CONTEXT: {domain_context}",
+            "",
+            "Apply this regulatory domain context to your analysis.",
+        ])
+    followup_system = "\n".join(followup_parts)
+    followup_transcript_parts.append(f"### Followup Discussion: {topic}\n")
+    for i, (name, model, fallback) in enumerate(followup_models):
+        messages = [
+            {"role": "system", "content": followup_system},
+            {"role": "user", "content": f"Original Question:\n\n{question}\n\nFocus your response on: {topic}"},
+        ]
+        if verbose:
+            print(f"### {name}")
+        response = query_model(api_key, model, messages, stream=verbose)
+        if verbose:
+            print()
+        followup_transcript_parts.append(f"### {name}\n{response}\n")
+    if verbose:
+        print("=" * 60)
+        print("FOLLOWUP COMPLETE")
+        print("=" * 60)
+        print()
+    return "\n\n".join(followup_transcript_parts)
 def run_council(
     question: str,
     council_config: list[tuple[str, str, tuple[str, str] | None]],
@@ -554,17 +737,22 @@ def run_council(
     context: str | None = None,
     social_mode: bool = False,
     persona: str | None = None,
-    advocate_idx: int | None = None,
+    domain: str | None = None,
+    challenger_idx: int | None = None,  # Starting challenger index, rotates each round
+    format: str = "prose",
 ) -> tuple[str, list[str]]:
     """Run the council deliberation. Returns (transcript, failed_models)."""
+    start_time = time.time()
+    domain_context = DOMAIN_CONTEXTS.get(domain, "") if domain else ""
     council_names = [name for name, _, _ in council_config]
     blind_claims = []
     failed_models = []
     if blind:
         blind_claims = asyncio.run(run_blind_phase_parallel(
-            question, council_config, api_key, google_api_key, moonshot_api_key, verbose, persona
+            question, council_config, api_key, google_api_key, moonshot_api_key, verbose, persona, domain_context
         ))
         for name, model_name, claims in blind_claims:
             if claims.startswith("["):
@@ -580,6 +768,8 @@ def run_council(
         if anonymous:
             print("(Models see each other as Speaker 1, 2, etc. to prevent bias)")
         print(f"Rounds: {rounds}")
+        if domain:
+            print(f"Domain context: {domain}")
         print(f"Question: {question[:100]}{'...' if len(question) > 100 else ''}")
         print()
         print("=" * 60)
@@ -589,6 +779,7 @@ def run_council(
     conversation = []
     output_parts = []
+    current_round = 0
     if blind_claims:
         for name, model_name, claims in blind_claims:
@@ -609,19 +800,6 @@ Your output should feel natural in conversation - something you'd actually say o
 Avoid structured, multi-part diagnostic questions that sound like interrogation.
 Simple and human beats strategic and comprehensive. Optimize for being relatable, not thorough."""
-    devils_advocate_addition = """
-SPECIAL ROLE: You are the DEVIL'S ADVOCATE. Your job is to push back HARD.
-REQUIREMENTS:
-1. You MUST explicitly DISAGREE with at least one major point from the other speakers
-2. Identify the weakest assumption in the emerging consensus and attack it
-3. Consider: What would make this advice WRONG? What's the contrarian take?
-4. If everyone is converging too fast, that's a red flag — find the hidden complexity
-Don't just "add nuance" or "build on" — find something to genuinely challenge.
-If you can't find real disagreement, say why the consensus might be groupthink."""
     first_speaker_with_blind = """You are {name}, speaking first in Round {round_num} of a council deliberation.
 You've seen everyone's BLIND CLAIMS (their independent initial positions). Now engage:
@@ -652,7 +830,22 @@ Previous speakers this round: {previous_speakers}
 Be direct. Challenge weak arguments. Don't be sycophantic.
 Prioritize PRACTICAL, ACTIONABLE advice over academic observations. Avoid jargon."""
+    challenger_addition = """
+SPECIAL ROLE: You are the CHALLENGER for this round. Your job is to argue the CONTRARIAN position.
+REQUIREMENTS:
+1. You MUST explicitly DISAGREE with at least one major point from the other speakers
+2. Identify the weakest assumption in the emerging consensus and attack it
+3. Name ONE specific thing that would make the consensus WRONG
+4. You CANNOT use phrases like "building on", "adding nuance", or "I largely agree"
+5. If everyone is converging too fast, that's a red flag — find the hidden complexity
+Even if you ultimately agree with the direction, you MUST articulate the strongest possible counter-argument.
+If you can't find real disagreement, explain why the consensus might be groupthink."""
     for round_num in range(rounds):
+        current_round = round_num + 1
         round_speakers = []
         for idx, (name, model, fallback) in enumerate(council_config):
             dname = display_names[name]
@@ -673,6 +866,13 @@ Prioritize PRACTICAL, ACTIONABLE advice over academic observations. Avoid jargon
                     previous_speakers=previous
                 )
+            if domain_context:
+                system_prompt += f"""
+DOMAIN CONTEXT: {domain_context}
+Apply this regulatory domain context to your analysis."""
             if social_mode:
                 system_prompt += social_constraint
@@ -684,8 +884,16 @@ IMPORTANT CONTEXT about the person asking:
 Factor this into your advice — don't just give strategically optimal answers, consider what fits THIS person."""
-            if idx == advocate_idx and round_num == 0:
-                system_prompt += devils_advocate_addition
+            # Calculate rotating challenger for this round
+            if challenger_idx is not None:
+                # Explicit --challenger sets starting point, then rotates
+                current_challenger = (challenger_idx + round_num) % len(council_config)
+            else:
+                # Default: start with Claude (index 0), rotate through council
+                current_challenger = round_num % len(council_config)
+            if idx == current_challenger:
+                system_prompt += challenger_addition
             user_content = f"Question for the council:\n\n{question}"
             if blind_context:
@@ -705,9 +913,10 @@ Factor this into your advice — don't just give strategically optimal answers,
                 })
             model_name = model.split("/")[-1]
+            challenger_indicator = " (challenger)" if idx == current_challenger else ""
             if verbose:
-                print(f"### {model_name}")
+                print(f"### {model_name}{challenger_indicator}")
                 if is_thinking_model(model):
                     print("(thinking...)", flush=True)
@@ -743,9 +952,10 @@ Factor this into your advice — don't just give strategically optimal answers,
             if verbose:
                 print()
-            output_parts.append(f"### {model_name}\n{response}")
+            output_parts.append(f"### {model_name}{challenger_indicator}\n{response}")
-        converged, reason = detect_consensus(conversation, len(council_config))
+        # current_challenger already calculated in the speaker loop above
+        converged, reason = detect_consensus(conversation, council_config, current_challenger)
         if converged:
             if verbose:
                 print(f">>> CONSENSUS DETECTED ({reason}) - proceeding to judge\n")
@@ -755,6 +965,10 @@ Factor this into your advice — don't just give strategically optimal answers,
     context_hint = ""
     if context:
         context_hint = f"\n\nContext about this question: {context}\nConsider this context when weighing perspectives and forming recommendations."
+    domain_hint = ""
+    if domain_context:
+        domain_hint = f"\n\nDOMAIN CONTEXT: {domain}\nConsider this regulatory domain context when weighing perspectives and forming recommendations."
     social_judge_section = ""
     if social_mode:
@@ -763,14 +977,17 @@ Factor this into your advice — don't just give strategically optimal answers,
 ## Social Calibration Check
 [Would the recommendation feel natural in conversation? Is it something you'd actually say, or does it sound like strategic over-optimization? If the council produced something too formal/structured, suggest a simpler, more human alternative.]"""
-    judge_system = f"""You are the Judge, responsible for synthesizing the council's deliberation.{context_hint}
+    judge_system = f"""You are the Judge (Claude), responsible for synthesizing the council's deliberation.{context_hint}{domain_hint}
+You did NOT participate in the deliberation — you're seeing it fresh. This gives you objectivity.
 After the council members have shared their perspectives, you:
 1. Identify points of AGREEMENT across all members
 2. Identify points of DISAGREEMENT and explain the different views
-3. Provide a SYNTHESIS that captures the council's collective wisdom
-4. Give a final RECOMMENDATION based on the deliberation
-{"5. SOCIAL CALIBRATION: Check if the recommendation would feel natural in actual conversation" if social_mode else ""}
+3. Add YOUR OWN perspective — what did the council miss? What's your independent take?
+4. Provide a SYNTHESIS that integrates the council's views with your own
+5. Give a final RECOMMENDATION based on everything
+{"6. SOCIAL CALIBRATION: Check if the recommendation would feel natural in actual conversation" if social_mode else ""}
 Format your response as:
@@ -780,13 +997,16 @@ Format your response as:
 ## Points of Disagreement
 [Where views differ and why]
+## Judge's Own Take
+[Your independent perspective. What did the council miss or underweight? What would YOU add to this discussion?]
 ## Synthesis
-[The integrated perspective]
+[The integrated perspective, combining council views with your own]
 ## Recommendation
-[Your final recommendation based on the deliberation]
+[Your final recommendation]
 {social_judge_section}
-Be balanced and fair. Acknowledge minority views. Don't just pick a winner.{" For social contexts, prioritize natural/human output over strategic optimization." if social_mode else ""}
+Be balanced and fair. Acknowledge minority views. But don't be afraid to have your own opinion — you're the judge, not just a summarizer.{" For social contexts, prioritize natural/human output over strategic optimization." if social_mode else ""}
 IMPORTANT: In your Recommendation, clearly distinguish:
 - **Do Now** — practical actions the user can take immediately
@@ -815,6 +1035,21 @@ Don't recommend building infrastructure for problems that don't exist yet."""
     output_parts.append(f"### Judge ({judge_model_name})\n{judge_response}")
+    if format != 'prose':
+        structured = extract_structured_summary(
+            judge_response=judge_response,
+            question=question,
+            models_used=[name for name, _, _ in council_config],
+            rounds=current_round if rounds > 0 else 1,
+            duration=time.time() - start_time,
+            cost=0.85,
+        )
+        if format == 'json':
+            output_parts.append('\n\n---\n\n' + json.dumps(structured, indent=2, ensure_ascii=False))
+        else:
+            output_parts.append('\n\n---\n\n' + yaml.dump(structured, allow_unicode=True, default_flow_style=False))
     if anonymous:
         final_output = "\n\n".join(output_parts)
         for name, model, _ in council_config:

frontier_council/schema.py ADDED Viewed

@@ -0,0 +1,27 @@
+from pydantic import BaseModel
+from typing import Literal
+class ActionItem(BaseModel):
+    action: str
+    priority: Literal["high", "medium", "low"] = "medium"
+class Dissent(BaseModel):
+    model: str
+    concern: str
+class CouncilMeta(BaseModel):
+    timestamp: str
+    models_used: list[str]
+    rounds: int
+    duration_seconds: float
+    estimated_cost_usd: float
+class CouncilOutput(BaseModel):
+    schema_version: str = "1.0"
+    question: str
+    decision: str
+    confidence: Literal["low", "medium", "high"]
+    reasoning_summary: str
+    dissents: list[Dissent]
+    action_items: list[ActionItem]
+    meta: CouncilMeta

{frontier_council-0.1.2.dist-info → frontier_council-0.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: frontier-council
-Version: 0.1.2
-Summary: Multi-model deliberation for important decisions. 5 frontier LLMs debate, then a judge synthesizes consensus.
+Version: 0.2.0
+Summary: Multi-model deliberation for important decisions. 4 frontier LLMs debate with rotating challenger, then Claude judges.
 Project-URL: Homepage, https://github.com/terry-li-hm/frontier-council
 Project-URL: Repository, https://github.com/terry-li-hm/frontier-council
 Project-URL: Issues, https://github.com/terry-li-hm/frontier-council/issues
@@ -19,22 +19,25 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.11
 Requires-Dist: httpx>=0.25.0
+Requires-Dist: pydantic>=2.0
+Requires-Dist: pyyaml>=6.0
 Description-Content-Type: text/markdown
 # Frontier Council
-Multi-model deliberation for important decisions. 5 frontier LLMs debate a question, then a judge synthesizes consensus.
+Multi-model deliberation for important decisions. 4 frontier LLMs debate a question, then Claude judges and synthesizes.
-Inspired by [Andrej Karpathy's LLM Council](https://github.com/karpathy/llm-council), with added blind phase (anti-anchoring), explicit engagement requirements, devil's advocate role, and social calibration mode.
+Inspired by [Andrej Karpathy's LLM Council](https://github.com/karpathy/llm-council), with added blind phase (anti-anchoring), explicit engagement requirements, rotating challenger role, and social calibration mode.
 ## Models
-- Claude (claude-opus-4.5)
+**Council (deliberators):**
 - GPT (gpt-5.2-pro)
 - Gemini (gemini-3-pro-preview)
 - Grok (grok-4)
 - Kimi (kimi-k2.5)
-- Judge: Claude Opus 4.5
+**Judge:** Claude Opus 4.5 (synthesizes + adds own perspective)
 ## Installation
@@ -99,7 +102,9 @@ All sessions are auto-saved to `~/.frontier-council/sessions/` for later review.
 | `--share` | Upload transcript to secret GitHub Gist |
 | `--social` | Enable social calibration mode (auto-detected for interview/networking) |
 | `--persona TEXT` | Context about the person asking |
-| `--advocate N` | Which speaker (1-5) should be devil's advocate (default: random) |
+| `--challenger MODEL` | Which model starts as challenger (gpt/gemini/grok/kimi). Rotates each round. |
+| `--domain DOMAIN` | Regulatory domain context (banking, healthcare, eu, fintech, bio) |
+| `--followup` | Enable interactive drill-down after judge synthesis |
 | `--quiet` | Suppress progress output |
 | `--sessions` | List recent saved sessions |
 | `--no-save` | Don't auto-save transcript to ~/.frontier-council/sessions/ |
@@ -114,9 +119,15 @@ All sessions are auto-saved to `~/.frontier-council/sessions/` for later review.
 **Deliberation Protocol:**
 1. All models see everyone's blind claims, then deliberate
 2. Each model MUST explicitly AGREE, DISAGREE, or BUILD ON previous speakers by name
-3. After each round, the system checks for consensus (4/5 agreement triggers early exit)
+3. After each round, the system checks for consensus (3/4 non-challengers agreeing triggers early exit)
 4. Judge synthesizes the full deliberation
+**Rotating Challenger:**
+- One model each round is assigned the "challenger" role
+- The challenger MUST argue the contrarian position and identify weaknesses in emerging consensus
+- Role rotates each round (GPT R1 → Gemini R2 → Grok R3 → Kimi R4...) to ensure sustained disagreement
+- Challenger is excluded from consensus detection (forced disagreement shouldn't block early exit)
 **Anonymous Deliberation:**
 - Models see each other as "Speaker 1", "Speaker 2", etc. during deliberation
 - Prevents models from playing favorites based on vendor reputation

frontier_council-0.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+frontier_council/__init__.py,sha256=lJJIdVKStG2zLYlKMtvIpxK3S2D0NsWK_ZIKc86y2VM,357
+frontier_council/cli.py,sha256=eN3EdJGWEkQ4OEmWq2s1IQo3-Q8kPUrQMgcPMRkCDa8,13796
+frontier_council/council.py,sha256=ntu4D0NQizglfxGxDvuXZ7hjRXWqDNgzaIVZci8cmzc,39617
+frontier_council/schema.py,sha256=j4436pYP-PtBorQZve6dpA7JqYlkTIlkxMyufSmVra0,633
+frontier_council-0.2.0.dist-info/METADATA,sha256=jAZRJ0fpGxkM_zsmTMFLe_WRRwWulr8PVIm5xwsFNy4,5921
+frontier_council-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+frontier_council-0.2.0.dist-info/entry_points.txt,sha256=I3xjPK-nupfQz5PANVXUnXjuxlP-4-mykkA3wXhFOGY,63
+frontier_council-0.2.0.dist-info/licenses/LICENSE,sha256=8qmwox7khp-AakNVvL-Ga25eYbsCtLx8RyXM4zKkX0w,1065
+frontier_council-0.2.0.dist-info/RECORD,,

frontier_council-0.1.2.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-frontier_council/__init__.py,sha256=VAITtl8rVW6wSN4iN86TVOmB2D2nrGu8ZI9ezDnOL3I,357
-frontier_council/cli.py,sha256=9ZJgdFXHSgFDACf30veWb3uHruxlueVQY-N8hHSJL9M,9772
-frontier_council/council.py,sha256=u2ir34dNostBOhXUi1R0wFEfBIEgiRX8thiS5lRFnnU,30226
-frontier_council-0.1.2.dist-info/METADATA,sha256=E9iEMkdVEBtmA2NyprKIWipDAnR0gHWVGbIonlmVYrc,5229
-frontier_council-0.1.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-frontier_council-0.1.2.dist-info/entry_points.txt,sha256=I3xjPK-nupfQz5PANVXUnXjuxlP-4-mykkA3wXhFOGY,63
-frontier_council-0.1.2.dist-info/licenses/LICENSE,sha256=8qmwox7khp-AakNVvL-Ga25eYbsCtLx8RyXM4zKkX0w,1065
-frontier_council-0.1.2.dist-info/RECORD,,

{frontier_council-0.1.2.dist-info → frontier_council-0.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{frontier_council-0.1.2.dist-info → frontier_council-0.2.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{frontier_council-0.1.2.dist-info → frontier_council-0.2.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

frontier-council 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl

frontier-council 0.1.2py3-none-any.whl → 0.2.0py3-none-any.whl