flonat-research 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/domain-reviewer.md +336 -0
- package/.claude/agents/fixer.md +226 -0
- package/.claude/agents/paper-critic.md +370 -0
- package/.claude/agents/peer-reviewer.md +289 -0
- package/.claude/agents/proposal-reviewer.md +215 -0
- package/.claude/agents/referee2-reviewer.md +367 -0
- package/.claude/agents/references/journal-referee-profiles.md +354 -0
- package/.claude/agents/references/paper-critic/council-personas.md +77 -0
- package/.claude/agents/references/paper-critic/council-prompts.md +198 -0
- package/.claude/agents/references/peer-reviewer/report-template.md +199 -0
- package/.claude/agents/references/peer-reviewer/sa-prompts.md +260 -0
- package/.claude/agents/references/peer-reviewer/security-scan.md +188 -0
- package/.claude/agents/references/proposal-reviewer/report-template.md +144 -0
- package/.claude/agents/references/proposal-reviewer/sa-prompts.md +149 -0
- package/.claude/agents/references/referee-config.md +114 -0
- package/.claude/agents/references/referee2-reviewer/audit-checklists.md +287 -0
- package/.claude/agents/references/referee2-reviewer/report-template.md +334 -0
- package/.claude/rules/design-before-results.md +52 -0
- package/.claude/rules/ignore-agents-md.md +17 -0
- package/.claude/rules/ignore-gemini-md.md +17 -0
- package/.claude/rules/lean-claude-md.md +45 -0
- package/.claude/rules/learn-tags.md +99 -0
- package/.claude/rules/overleaf-separation.md +67 -0
- package/.claude/rules/plan-first.md +175 -0
- package/.claude/rules/read-docs-first.md +50 -0
- package/.claude/rules/scope-discipline.md +28 -0
- package/.claude/settings.json +125 -0
- package/.context/current-focus.md +33 -0
- package/.context/preferences/priorities.md +36 -0
- package/.context/preferences/task-naming.md +28 -0
- package/.context/profile.md +29 -0
- package/.context/projects/_index.md +41 -0
- package/.context/projects/papers/nudge-exp.md +22 -0
- package/.context/projects/papers/uncertainty.md +31 -0
- package/.context/resources/claude-scientific-writer-review.md +48 -0
- package/.context/resources/cunningham-multi-analyst-agents.md +104 -0
- package/.context/resources/cunningham-multilang-code-audit.md +62 -0
- package/.context/resources/google-ai-co-scientist-review.md +72 -0
- package/.context/resources/karpathy-llm-council-review.md +58 -0
- package/.context/resources/multi-coder-reliability-protocol.md +175 -0
- package/.context/resources/pedro-santanna-takeaways.md +96 -0
- package/.context/resources/venue-rankings/abs_ajg_2024.csv +1823 -0
- package/.context/resources/venue-rankings/abs_ajg_2024_econ.csv +356 -0
- package/.context/resources/venue-rankings/cabs_4_4star_theory.csv +40 -0
- package/.context/resources/venue-rankings/core_2026.csv +801 -0
- package/.context/resources/venue-rankings.md +147 -0
- package/.context/workflows/README.md +69 -0
- package/.context/workflows/daily-review.md +91 -0
- package/.context/workflows/meeting-actions.md +108 -0
- package/.context/workflows/replication-protocol.md +155 -0
- package/.context/workflows/weekly-review.md +113 -0
- package/.mcp-server-biblio/formatters.py +158 -0
- package/.mcp-server-biblio/pyproject.toml +11 -0
- package/.mcp-server-biblio/server.py +678 -0
- package/.mcp-server-biblio/sources/__init__.py +14 -0
- package/.mcp-server-biblio/sources/base.py +73 -0
- package/.mcp-server-biblio/sources/formatters.py +83 -0
- package/.mcp-server-biblio/sources/models.py +22 -0
- package/.mcp-server-biblio/sources/multi_source.py +243 -0
- package/.mcp-server-biblio/sources/openalex_source.py +183 -0
- package/.mcp-server-biblio/sources/scopus_source.py +309 -0
- package/.mcp-server-biblio/sources/wos_source.py +508 -0
- package/.mcp-server-biblio/uv.lock +896 -0
- package/.scripts/README.md +161 -0
- package/.scripts/ai_pattern_density.py +446 -0
- package/.scripts/conf +445 -0
- package/.scripts/config.py +122 -0
- package/.scripts/count_inventory.py +275 -0
- package/.scripts/daily_digest.py +288 -0
- package/.scripts/done +177 -0
- package/.scripts/extract_meeting_actions.py +223 -0
- package/.scripts/focus +176 -0
- package/.scripts/generate-codex-agents-md.py +217 -0
- package/.scripts/inbox +194 -0
- package/.scripts/notion_helpers.py +325 -0
- package/.scripts/openalex/query_helpers.py +306 -0
- package/.scripts/papers +227 -0
- package/.scripts/query +223 -0
- package/.scripts/session-history.py +201 -0
- package/.scripts/skill-health.py +516 -0
- package/.scripts/skill-log-miner.py +273 -0
- package/.scripts/sync-to-codex.sh +252 -0
- package/.scripts/task +213 -0
- package/.scripts/tasks +190 -0
- package/.scripts/week +206 -0
- package/CLAUDE.md +197 -0
- package/LICENSE +21 -0
- package/MEMORY.md +38 -0
- package/README.md +269 -0
- package/docs/agents.md +44 -0
- package/docs/bibliography-setup.md +55 -0
- package/docs/council-mode.md +36 -0
- package/docs/getting-started.md +245 -0
- package/docs/hooks.md +38 -0
- package/docs/mcp-servers.md +82 -0
- package/docs/notion-setup.md +109 -0
- package/docs/rules.md +33 -0
- package/docs/scripts.md +303 -0
- package/docs/setup-overview/setup-overview.pdf +0 -0
- package/docs/skills.md +70 -0
- package/docs/system.md +159 -0
- package/hooks/block-destructive-git.sh +66 -0
- package/hooks/context-monitor.py +114 -0
- package/hooks/postcompact-restore.py +157 -0
- package/hooks/precompact-autosave.py +181 -0
- package/hooks/promise-checker.sh +124 -0
- package/hooks/protect-source-files.sh +81 -0
- package/hooks/resume-context-loader.sh +53 -0
- package/hooks/startup-context-loader.sh +102 -0
- package/package.json +51 -0
- package/packages/cli-council/.github/workflows/claude-code-review.yml +44 -0
- package/packages/cli-council/.github/workflows/claude.yml +50 -0
- package/packages/cli-council/README.md +100 -0
- package/packages/cli-council/pyproject.toml +43 -0
- package/packages/cli-council/src/cli_council/__init__.py +19 -0
- package/packages/cli-council/src/cli_council/__main__.py +185 -0
- package/packages/cli-council/src/cli_council/backends/__init__.py +8 -0
- package/packages/cli-council/src/cli_council/backends/base.py +81 -0
- package/packages/cli-council/src/cli_council/backends/claude.py +25 -0
- package/packages/cli-council/src/cli_council/backends/codex.py +27 -0
- package/packages/cli-council/src/cli_council/backends/gemini.py +26 -0
- package/packages/cli-council/src/cli_council/checkpoint.py +212 -0
- package/packages/cli-council/src/cli_council/config.py +51 -0
- package/packages/cli-council/src/cli_council/council.py +391 -0
- package/packages/cli-council/src/cli_council/models.py +46 -0
- package/packages/llm-council/.github/workflows/claude-code-review.yml +44 -0
- package/packages/llm-council/.github/workflows/claude.yml +50 -0
- package/packages/llm-council/README.md +453 -0
- package/packages/llm-council/pyproject.toml +42 -0
- package/packages/llm-council/src/llm_council/__init__.py +23 -0
- package/packages/llm-council/src/llm_council/__main__.py +259 -0
- package/packages/llm-council/src/llm_council/checkpoint.py +193 -0
- package/packages/llm-council/src/llm_council/client.py +253 -0
- package/packages/llm-council/src/llm_council/config.py +232 -0
- package/packages/llm-council/src/llm_council/council.py +482 -0
- package/packages/llm-council/src/llm_council/models.py +46 -0
- package/packages/mcp-bibliography/MEMORY.md +31 -0
- package/packages/mcp-bibliography/_app.py +226 -0
- package/packages/mcp-bibliography/formatters.py +158 -0
- package/packages/mcp-bibliography/log/2026-03-13-2100.md +35 -0
- package/packages/mcp-bibliography/pyproject.toml +15 -0
- package/packages/mcp-bibliography/run.sh +20 -0
- package/packages/mcp-bibliography/scholarly_formatters.py +83 -0
- package/packages/mcp-bibliography/server.py +1857 -0
- package/packages/mcp-bibliography/tools/__init__.py +28 -0
- package/packages/mcp-bibliography/tools/_registry.py +19 -0
- package/packages/mcp-bibliography/tools/altmetric.py +107 -0
- package/packages/mcp-bibliography/tools/core.py +92 -0
- package/packages/mcp-bibliography/tools/dblp.py +52 -0
- package/packages/mcp-bibliography/tools/openalex.py +296 -0
- package/packages/mcp-bibliography/tools/opencitations.py +102 -0
- package/packages/mcp-bibliography/tools/openreview.py +179 -0
- package/packages/mcp-bibliography/tools/orcid.py +131 -0
- package/packages/mcp-bibliography/tools/scholarly.py +575 -0
- package/packages/mcp-bibliography/tools/unpaywall.py +63 -0
- package/packages/mcp-bibliography/tools/zenodo.py +123 -0
- package/packages/mcp-bibliography/uv.lock +711 -0
- package/scripts/setup.sh +143 -0
- package/skills/beamer-deck/SKILL.md +199 -0
- package/skills/beamer-deck/references/quality-rubric.md +54 -0
- package/skills/beamer-deck/references/review-prompts.md +106 -0
- package/skills/bib-validate/SKILL.md +261 -0
- package/skills/bib-validate/references/council-mode.md +34 -0
- package/skills/bib-validate/references/deep-verify.md +79 -0
- package/skills/bib-validate/references/fix-mode.md +36 -0
- package/skills/bib-validate/references/openalex-verification.md +45 -0
- package/skills/bib-validate/references/preprint-check.md +31 -0
- package/skills/bib-validate/references/ref-manager-crossref.md +41 -0
- package/skills/bib-validate/references/report-template.md +82 -0
- package/skills/code-archaeology/SKILL.md +141 -0
- package/skills/code-review/SKILL.md +265 -0
- package/skills/code-review/references/quality-rubric.md +67 -0
- package/skills/consolidate-memory/SKILL.md +208 -0
- package/skills/context-status/SKILL.md +126 -0
- package/skills/creation-guard/SKILL.md +230 -0
- package/skills/devils-advocate/SKILL.md +130 -0
- package/skills/devils-advocate/references/competing-hypotheses.md +83 -0
- package/skills/init-project/SKILL.md +115 -0
- package/skills/init-project-course/references/memory-and-settings.md +92 -0
- package/skills/init-project-course/references/organise-templates.md +94 -0
- package/skills/init-project-course/skill.md +147 -0
- package/skills/init-project-light/skill.md +139 -0
- package/skills/init-project-research/SKILL.md +368 -0
- package/skills/init-project-research/references/atlas-pipeline-sync.md +70 -0
- package/skills/init-project-research/references/atlas-schema.md +81 -0
- package/skills/init-project-research/references/confirmation-report.md +39 -0
- package/skills/init-project-research/references/domain-profile-template.md +104 -0
- package/skills/init-project-research/references/interview-round3.md +34 -0
- package/skills/init-project-research/references/literature-discovery.md +43 -0
- package/skills/init-project-research/references/scaffold-details.md +197 -0
- package/skills/init-project-research/templates/field-calibration.md +60 -0
- package/skills/init-project-research/templates/pipeline-manifest.md +63 -0
- package/skills/init-project-research/templates/run-all.sh +116 -0
- package/skills/init-project-research/templates/seed-files.md +337 -0
- package/skills/insights-deck/SKILL.md +151 -0
- package/skills/interview-me/SKILL.md +157 -0
- package/skills/latex/SKILL.md +141 -0
- package/skills/latex/references/latex-configs.md +183 -0
- package/skills/latex-autofix/SKILL.md +230 -0
- package/skills/latex-autofix/references/known-errors.md +183 -0
- package/skills/latex-autofix/references/quality-rubric.md +50 -0
- package/skills/latex-health-check/SKILL.md +161 -0
- package/skills/learn/SKILL.md +220 -0
- package/skills/learn/scripts/validate_skill.py +265 -0
- package/skills/lessons-learned/SKILL.md +201 -0
- package/skills/literature/SKILL.md +335 -0
- package/skills/literature/references/agent-templates.md +393 -0
- package/skills/literature/references/bibliometric-apis.md +44 -0
- package/skills/literature/references/cli-council-search.md +79 -0
- package/skills/literature/references/openalex-api-guide.md +371 -0
- package/skills/literature/references/openalex-common-queries.md +381 -0
- package/skills/literature/references/openalex-workflows.md +248 -0
- package/skills/literature/references/reference-manager-sync.md +36 -0
- package/skills/literature/references/scopus-api-guide.md +208 -0
- package/skills/literature/references/wos-api-guide.md +308 -0
- package/skills/multi-perspective/SKILL.md +311 -0
- package/skills/multi-perspective/references/computational-many-analysts.md +77 -0
- package/skills/pipeline-manifest/SKILL.md +226 -0
- package/skills/pre-submission-report/SKILL.md +153 -0
- package/skills/process-reviews/SKILL.md +244 -0
- package/skills/process-reviews/references/rr-routing.md +101 -0
- package/skills/project-deck/SKILL.md +87 -0
- package/skills/project-safety/SKILL.md +135 -0
- package/skills/proofread/SKILL.md +254 -0
- package/skills/proofread/references/quality-rubric.md +104 -0
- package/skills/python-env/SKILL.md +57 -0
- package/skills/quarto-deck/SKILL.md +226 -0
- package/skills/quarto-deck/references/markdown-format.md +143 -0
- package/skills/quarto-deck/references/quality-rubric.md +54 -0
- package/skills/save-context/SKILL.md +174 -0
- package/skills/session-log/SKILL.md +98 -0
- package/skills/shared/concept-validation-gate.md +161 -0
- package/skills/shared/council-protocol.md +265 -0
- package/skills/shared/distribution-diagnostics.md +164 -0
- package/skills/shared/engagement-stratified-sampling.md +218 -0
- package/skills/shared/escalation-protocol.md +74 -0
- package/skills/shared/external-audit-protocol.md +205 -0
- package/skills/shared/intercoder-reliability.md +256 -0
- package/skills/shared/mcp-degradation.md +81 -0
- package/skills/shared/method-probing-questions.md +163 -0
- package/skills/shared/multi-language-conventions.md +143 -0
- package/skills/shared/paid-api-safety.md +174 -0
- package/skills/shared/palettes.md +90 -0
- package/skills/shared/progressive-disclosure.md +92 -0
- package/skills/shared/project-documentation-content.md +443 -0
- package/skills/shared/project-documentation-format.md +281 -0
- package/skills/shared/project-documentation.md +100 -0
- package/skills/shared/publication-output.md +138 -0
- package/skills/shared/quality-scoring.md +70 -0
- package/skills/shared/reference-resolution.md +77 -0
- package/skills/shared/research-quality-rubric.md +165 -0
- package/skills/shared/rhetoric-principles.md +54 -0
- package/skills/shared/skill-design-patterns.md +272 -0
- package/skills/shared/skill-index.md +240 -0
- package/skills/shared/system-documentation.md +334 -0
- package/skills/shared/tikz-rules.md +402 -0
- package/skills/shared/validation-tiers.md +121 -0
- package/skills/shared/venue-guides/README.md +46 -0
- package/skills/shared/venue-guides/cell_press_style.md +483 -0
- package/skills/shared/venue-guides/conferences_formatting.md +564 -0
- package/skills/shared/venue-guides/cs_conference_style.md +463 -0
- package/skills/shared/venue-guides/examples/cell_summary_example.md +247 -0
- package/skills/shared/venue-guides/examples/medical_structured_abstract.md +313 -0
- package/skills/shared/venue-guides/examples/nature_abstract_examples.md +213 -0
- package/skills/shared/venue-guides/examples/neurips_introduction_example.md +245 -0
- package/skills/shared/venue-guides/journals_formatting.md +486 -0
- package/skills/shared/venue-guides/medical_journal_styles.md +535 -0
- package/skills/shared/venue-guides/ml_conference_style.md +556 -0
- package/skills/shared/venue-guides/nature_science_style.md +405 -0
- package/skills/shared/venue-guides/reviewer_expectations.md +417 -0
- package/skills/shared/venue-guides/venue_writing_styles.md +321 -0
- package/skills/split-pdf/SKILL.md +172 -0
- package/skills/split-pdf/methodology.md +48 -0
- package/skills/sync-notion/SKILL.md +93 -0
- package/skills/system-audit/SKILL.md +157 -0
- package/skills/system-audit/references/sub-agent-prompts.md +294 -0
- package/skills/task-management/SKILL.md +131 -0
- package/skills/update-focus/SKILL.md +204 -0
- package/skills/update-project-doc/SKILL.md +194 -0
- package/skills/validate-bib/SKILL.md +242 -0
- package/skills/validate-bib/references/council-mode.md +34 -0
- package/skills/validate-bib/references/deep-verify.md +71 -0
- package/skills/validate-bib/references/openalex-verification.md +45 -0
- package/skills/validate-bib/references/preprint-check.md +31 -0
- package/skills/validate-bib/references/report-template.md +62 -0
|
@@ -0,0 +1,453 @@
|
|
|
1
|
+
# LLM Council
|
|
2
|
+
|
|
3
|
+
A Python package for multi-model LLM deliberation via OpenRouter. Orchestrates independent assessments from multiple AI models, conducts anonymous peer review, and synthesises consensus through a chairman model.
|
|
4
|
+
|
|
5
|
+
## The 3-Stage Protocol
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
Stage 1: Individual Assessments (parallel)
|
|
9
|
+
┌─────────┐ ┌─────────┐ ┌─────────┐
|
|
10
|
+
│ Claude │ │ GPT-5 │ │ Gemini │
|
|
11
|
+
│ Sonnet │ │ │ │ 2.5 Pro │
|
|
12
|
+
└────┬─────┘ └────┬────┘ └────┬────┘
|
|
13
|
+
│ │ │
|
|
14
|
+
▼ ▼ ▼
|
|
15
|
+
Result A Result B Result C
|
|
16
|
+
(JSON) (JSON) (JSON)
|
|
17
|
+
|
|
18
|
+
Stage 2: Peer Review (parallel)
|
|
19
|
+
Each model reviews ALL assessments anonymously
|
|
20
|
+
┌────────────────────────────────────────┐
|
|
21
|
+
│ "Assessment A is comprehensive but │
|
|
22
|
+
│ misses X. Assessment C handles Y │
|
|
23
|
+
│ better than B..." │
|
|
24
|
+
│ │
|
|
25
|
+
│ FINAL RANKING: │
|
|
26
|
+
│ 1. Assessment C │
|
|
27
|
+
│ 2. Assessment A │
|
|
28
|
+
│ 3. Assessment B │
|
|
29
|
+
└────────────────────────────────────────┘
|
|
30
|
+
Rankings are aggregated across all reviewers
|
|
31
|
+
|
|
32
|
+
Stage 3: Chairman Synthesis (single model)
|
|
33
|
+
┌────────────────────────────────────────┐
|
|
34
|
+
│ Chairman reviews all assessments + │
|
|
35
|
+
│ all peer reviews, then produces a │
|
|
36
|
+
│ single synthesised result in the │
|
|
37
|
+
│ same JSON schema as Stage 1 │
|
|
38
|
+
└────────────────────────────────────────┘
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
**Why this works:**
|
|
42
|
+
- Multiple models catch each other's blind spots
|
|
43
|
+
- Anonymous peer review prevents model-name bias
|
|
44
|
+
- Aggregate rankings surface the best reasoning regardless of source
|
|
45
|
+
- Chairman synthesis resolves disagreements using the full deliberation record
|
|
46
|
+
|
|
47
|
+
## Installation
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install "llm-council @ git+https://github.com/user/llm-council.git"
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Requires Python 3.11+.
|
|
54
|
+
|
|
55
|
+
## Quick Start
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
import asyncio
|
|
59
|
+
from llm_council import LLMClient, CouncilService
|
|
60
|
+
|
|
61
|
+
async def main():
|
|
62
|
+
client = LLMClient(
|
|
63
|
+
api_key="sk-or-...", # OpenRouter API key
|
|
64
|
+
model="anthropic/claude-sonnet-4.5",
|
|
65
|
+
)
|
|
66
|
+
council = CouncilService(llm=client)
|
|
67
|
+
|
|
68
|
+
result = await council.run_council(
|
|
69
|
+
system_prompt="You are a research paper reviewer. Return JSON with: overall_score (1-10), strengths (list), weaknesses (list), recommendation (string).",
|
|
70
|
+
user_msg="Review this paper abstract: ...",
|
|
71
|
+
council_models=[
|
|
72
|
+
"anthropic/claude-sonnet-4.5",
|
|
73
|
+
"openai/gpt-5",
|
|
74
|
+
"google/gemini-2.5-pro",
|
|
75
|
+
],
|
|
76
|
+
chairman_model="anthropic/claude-sonnet-4.5",
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
print(f"Consensus score: {result.final_result['overall_score']}")
|
|
80
|
+
print(f"Council ranked: {[r['model_name'] for r in result.meta.aggregate_rankings]}")
|
|
81
|
+
print(f"Total time: {result.meta.total_ms}ms")
|
|
82
|
+
|
|
83
|
+
await client.close()
|
|
84
|
+
|
|
85
|
+
asyncio.run(main())
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## API Reference
|
|
89
|
+
|
|
90
|
+
### LLMClient
|
|
91
|
+
|
|
92
|
+
Generic async LLM client for OpenRouter.
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
client = LLMClient(
|
|
96
|
+
api_key: str, # OpenRouter API key
|
|
97
|
+
model: str = "anthropic/claude-sonnet-4.5", # Default model
|
|
98
|
+
max_tokens: int = 4096, # Max completion tokens
|
|
99
|
+
json_retry_attempts: int = 2, # Retries on JSON parse failure
|
|
100
|
+
)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
**Methods:**
|
|
104
|
+
|
|
105
|
+
| Method | Returns | Description |
|
|
106
|
+
|--------|---------|-------------|
|
|
107
|
+
| `chat_json(system, user_msg, *, model=None)` | `dict` | Send message, parse JSON response (with retries) |
|
|
108
|
+
| `chat_text(system, user_msg, *, model=None)` | `str` | Send message, return raw text |
|
|
109
|
+
| `close()` | `None` | Close the async HTTP client |
|
|
110
|
+
|
|
111
|
+
**JSON parsing** is robust — it tries three extraction strategies in order:
|
|
112
|
+
1. Parse the full response as JSON
|
|
113
|
+
2. Extract from markdown code fences (`` ```json ... ``` ``)
|
|
114
|
+
3. Extract the first `{...}` block
|
|
115
|
+
|
|
116
|
+
If all fail after retries, raises `LLMResponseFormatError`.
|
|
117
|
+
|
|
118
|
+
**Error handling** converts OpenRouter/OpenAI SDK errors into `LLMServiceError` with user-friendly messages:
|
|
119
|
+
|
|
120
|
+
| HTTP Status | Meaning | `LLMServiceError` message |
|
|
121
|
+
|-------------|---------|--------------------------|
|
|
122
|
+
| 401 | Bad API key | "Authentication failed" |
|
|
123
|
+
| 402 | No credits | "Insufficient credits" + help URL |
|
|
124
|
+
| 429 | Rate limited | "Rate limited — try again" |
|
|
125
|
+
| 503 | Model unavailable | "Model temporarily unavailable" |
|
|
126
|
+
|
|
127
|
+
### CouncilService
|
|
128
|
+
|
|
129
|
+
Orchestrates the 3-stage deliberation protocol.
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
council = CouncilService(llm: LLMClient)
|
|
133
|
+
|
|
134
|
+
result = await council.run_council(
|
|
135
|
+
system_prompt: str, # System prompt for Stage 1 assessments
|
|
136
|
+
user_msg: str, # User message for Stage 1
|
|
137
|
+
council_models: list[str], # 3+ OpenRouter model IDs
|
|
138
|
+
chairman_model: str, # Model for Stage 3 synthesis
|
|
139
|
+
*,
|
|
140
|
+
# Optional:
|
|
141
|
+
existing_result: dict | None = None, # Reuse a prior result as one assessment
|
|
142
|
+
existing_model: str | None = None, # Which model produced existing_result
|
|
143
|
+
stage2_system: str | None = None, # Custom peer review prompt
|
|
144
|
+
stage3_prompt_builder: Callable | None = None, # Custom chairman prompt builder
|
|
145
|
+
)
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
**Parameters:**
|
|
149
|
+
|
|
150
|
+
| Parameter | Required | Description |
|
|
151
|
+
|-----------|----------|-------------|
|
|
152
|
+
| `system_prompt` | Yes | Defines the task and expected JSON output schema |
|
|
153
|
+
| `user_msg` | Yes | The content to evaluate (abstract, topic, code, etc.) |
|
|
154
|
+
| `council_models` | Yes | List of OpenRouter model IDs (minimum 3 recommended) |
|
|
155
|
+
| `chairman_model` | Yes | Model ID for final synthesis |
|
|
156
|
+
| `existing_result` | No | Skip Stage 1 for one model by reusing a prior JSON result |
|
|
157
|
+
| `existing_model` | No | Model ID that produced `existing_result` |
|
|
158
|
+
| `stage2_system` | No | Override the default peer review system prompt |
|
|
159
|
+
| `stage3_prompt_builder` | No | Callable `(assessments, peer_reviews, user_msg) -> str` |
|
|
160
|
+
|
|
161
|
+
**Returns:** `CouncilResult` (see [Data Models](#data-models)).
|
|
162
|
+
|
|
163
|
+
### Data Models
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
from llm_council import (
|
|
167
|
+
CouncilResult,
|
|
168
|
+
CouncilAssessment,
|
|
169
|
+
CouncilPeerReview,
|
|
170
|
+
CouncilMeta,
|
|
171
|
+
)
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
#### CouncilResult
|
|
175
|
+
|
|
176
|
+
The complete output of a council deliberation.
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
class CouncilResult(BaseModel):
|
|
180
|
+
final_result: dict # Synthesised consensus (same schema as Stage 1)
|
|
181
|
+
assessments: list[CouncilAssessment] # All Stage 1 responses
|
|
182
|
+
peer_reviews: list[CouncilPeerReview] # All Stage 2 reviews
|
|
183
|
+
meta: CouncilMeta # Timing, rankings, diagnostics
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
#### CouncilAssessment
|
|
187
|
+
|
|
188
|
+
A single model's Stage 1 response.
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
class CouncilAssessment(BaseModel):
|
|
192
|
+
model: str # OpenRouter model ID (e.g., "anthropic/claude-sonnet-4.5")
|
|
193
|
+
model_name: str # Human-readable name (e.g., "Claude Sonnet 4.5")
|
|
194
|
+
result_json: dict # The structured JSON response
|
|
195
|
+
label: str = "" # Anonymised label ("Assessment A", "Assessment B", etc.)
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
#### CouncilPeerReview
|
|
199
|
+
|
|
200
|
+
A single model's Stage 2 peer review.
|
|
201
|
+
|
|
202
|
+
```python
|
|
203
|
+
class CouncilPeerReview(BaseModel):
|
|
204
|
+
model: str # Reviewer's model ID
|
|
205
|
+
model_name: str # Human-readable name
|
|
206
|
+
review_text: str # Free-form evaluation text
|
|
207
|
+
parsed_ranking: list[str] = Field(default_factory=list) # ["Assessment C", "Assessment A", ...]
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
#### CouncilMeta
|
|
211
|
+
|
|
212
|
+
Metadata and diagnostics.
|
|
213
|
+
|
|
214
|
+
```python
|
|
215
|
+
class CouncilMeta(BaseModel):
|
|
216
|
+
council_models: list[str] # All participating model IDs
|
|
217
|
+
chairman_model: str # Chairman model ID
|
|
218
|
+
stage1_ms: int = 0 # Stage 1 wall-clock time
|
|
219
|
+
stage2_ms: int = 0 # Stage 2 wall-clock time
|
|
220
|
+
stage3_ms: int = 0 # Stage 3 wall-clock time
|
|
221
|
+
total_ms: int = 0 # Total wall-clock time
|
|
222
|
+
reused_model: str | None = None # Model whose result was reused (if any)
|
|
223
|
+
aggregate_rankings: list[dict] = Field(...) # Sorted by average_rank
|
|
224
|
+
stage3_fallback: bool = False # True if chairman failed → used top assessment
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
**`aggregate_rankings`** — computed from all peer reviews:
|
|
228
|
+
|
|
229
|
+
```python
|
|
230
|
+
[
|
|
231
|
+
{"label": "Assessment C", "model": "google/gemini-2.5-pro", "model_name": "Gemini 2.5 Pro", "average_rank": 1.0, "rankings_count": 3},
|
|
232
|
+
{"label": "Assessment A", "model": "anthropic/claude-sonnet-4.5", "model_name": "Claude Sonnet 4.5", "average_rank": 2.0, "rankings_count": 3},
|
|
233
|
+
{"label": "Assessment B", "model": "openai/gpt-5", "model_name": "GPT-5", "average_rank": 3.0, "rankings_count": 3},
|
|
234
|
+
]
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## Configuration Module
|
|
238
|
+
|
|
239
|
+
The `config` module provides model registry management, user defaults, and pricing.
|
|
240
|
+
|
|
241
|
+
```python
|
|
242
|
+
from llm_council.config import (
|
|
243
|
+
AVAILABLE_MODELS, # Default model list (17 models)
|
|
244
|
+
ALLOWED_PROVIDERS, # {"anthropic", "openai", "google"}
|
|
245
|
+
COUNCIL_DEFAULT_MODELS, # Default 3 council members (resolves user config)
|
|
246
|
+
COUNCIL_DEFAULT_CHAIRMAN, # Default chairman model (resolves user config)
|
|
247
|
+
model_display_name, # model_id -> human name
|
|
248
|
+
get_council_defaults, # Get council models (user config > built-in)
|
|
249
|
+
get_chairman_default, # Get chairman model (user config > built-in)
|
|
250
|
+
set_council_defaults, # Persist council defaults to ~/.config/llm-council/
|
|
251
|
+
reset_council_defaults, # Revert to built-in defaults
|
|
252
|
+
fetch_model_pricing, # Enrich models with live OpenRouter pricing
|
|
253
|
+
fetch_all_provider_models, # Discover all models from allowed providers
|
|
254
|
+
load_models, # Load saved model list from JSON file
|
|
255
|
+
save_models, # Persist model list to JSON file
|
|
256
|
+
)
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
### User Defaults
|
|
260
|
+
|
|
261
|
+
Council defaults are resolved in order: **user config** > **built-in defaults**.
|
|
262
|
+
|
|
263
|
+
```python
|
|
264
|
+
# Get current defaults (resolves user config automatically)
|
|
265
|
+
models = get_council_defaults() # e.g., ["anthropic/claude-sonnet-4.6", ...]
|
|
266
|
+
chairman = get_chairman_default() # e.g., "anthropic/claude-opus-4.5"
|
|
267
|
+
|
|
268
|
+
# Set and persist defaults
|
|
269
|
+
set_council_defaults(
|
|
270
|
+
models=["anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-pro-preview"],
|
|
271
|
+
chairman="anthropic/claude-opus-4.5",
|
|
272
|
+
)
|
|
273
|
+
# Saved to ~/.config/llm-council/config.json
|
|
274
|
+
|
|
275
|
+
# Revert to built-in defaults
|
|
276
|
+
reset_council_defaults()
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
**Built-in defaults** (used when no user config exists):
|
|
280
|
+
- Models: `anthropic/claude-sonnet-4.5`, `openai/gpt-5`, `google/gemini-2.5-pro`
|
|
281
|
+
- Chairman: `anthropic/claude-sonnet-4.5`
|
|
282
|
+
|
|
283
|
+
### Available Models (17 total)
|
|
284
|
+
|
|
285
|
+
| Provider | Models |
|
|
286
|
+
|----------|--------|
|
|
287
|
+
| **Anthropic** (5) | Claude Haiku 4.5, Sonnet 4.5, Sonnet 4.6, Opus 4.5, Opus 4.6 |
|
|
288
|
+
| **OpenAI** (7) | GPT-4.1 Mini, GPT-4.1, GPT-5 Mini, GPT-5, GPT-5.2, o3, o4 Mini |
|
|
289
|
+
| **Google** (5) | Gemini 2.5 Flash, 2.5 Pro, 3 Flash, 3 Pro, 3.1 Pro |
|
|
290
|
+
|
|
291
|
+
### Live Pricing
|
|
292
|
+
|
|
293
|
+
```python
|
|
294
|
+
# Enrich the default model list with live pricing from OpenRouter
|
|
295
|
+
models = await fetch_model_pricing()
|
|
296
|
+
# Returns: [{"id": "...", "name": "...", "tier": "...", "input_price": "$3.00", "output_price": "$15.00", "provider": "anthropic"}, ...]
|
|
297
|
+
|
|
298
|
+
# Or discover ALL available models from allowed providers
|
|
299
|
+
all_models = await fetch_all_provider_models()
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
### Model Persistence
|
|
303
|
+
|
|
304
|
+
```python
|
|
305
|
+
# Save the current model selection to disk
|
|
306
|
+
save_models("models.json", models)
|
|
307
|
+
|
|
308
|
+
# Load it back (returns None if file missing)
|
|
309
|
+
loaded = load_models("models.json")
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
## Advanced Usage
|
|
313
|
+
|
|
314
|
+
### Reusing a Prior Result
|
|
315
|
+
|
|
316
|
+
If you already have a result from a single model and want to run a council review around it:
|
|
317
|
+
|
|
318
|
+
```python
|
|
319
|
+
# First, run a single-model analysis
|
|
320
|
+
single_result = await client.chat_json(system_prompt, user_msg)
|
|
321
|
+
|
|
322
|
+
# Then, run a council that reuses this result as one assessment
|
|
323
|
+
council_result = await council.run_council(
|
|
324
|
+
system_prompt=system_prompt,
|
|
325
|
+
user_msg=user_msg,
|
|
326
|
+
council_models=["anthropic/claude-sonnet-4.5", "openai/gpt-5", "google/gemini-2.5-pro"],
|
|
327
|
+
chairman_model="anthropic/claude-sonnet-4.5",
|
|
328
|
+
existing_result=single_result,
|
|
329
|
+
existing_model="anthropic/claude-sonnet-4.5",
|
|
330
|
+
)
|
|
331
|
+
# Stage 1 runs only for gpt-5 and gemini — claude's result is reused
|
|
332
|
+
# meta.reused_model == "anthropic/claude-sonnet-4.5"
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
### Custom Peer Review Prompt
|
|
336
|
+
|
|
337
|
+
```python
|
|
338
|
+
result = await council.run_council(
|
|
339
|
+
...,
|
|
340
|
+
stage2_system="You are a domain expert reviewing research assessments. Focus on methodological rigour and citation accuracy. End with FINAL RANKING.",
|
|
341
|
+
)
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
### Custom Chairman Prompt Builder
|
|
345
|
+
|
|
346
|
+
```python
|
|
347
|
+
def my_chairman_prompt(assessments, peer_reviews, user_msg):
|
|
348
|
+
return f"""
|
|
349
|
+
Given these {len(assessments)} assessments and {len(peer_reviews)} peer reviews,
|
|
350
|
+
synthesise a consensus. Prioritise methodological soundness.
|
|
351
|
+
Original question: {user_msg}
|
|
352
|
+
...
|
|
353
|
+
"""
|
|
354
|
+
|
|
355
|
+
result = await council.run_council(
|
|
356
|
+
...,
|
|
357
|
+
stage3_prompt_builder=my_chairman_prompt,
|
|
358
|
+
)
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
### Fallback Handling
|
|
362
|
+
|
|
363
|
+
If the chairman model fails (network error, malformed response), the council falls back to the top-ranked assessment from Stage 2:
|
|
364
|
+
|
|
365
|
+
```python
|
|
366
|
+
if result.meta.stage3_fallback:
|
|
367
|
+
print("Warning: Chairman synthesis failed — using top-ranked assessment")
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
## CLI
|
|
371
|
+
|
|
372
|
+
### Run a Council
|
|
373
|
+
|
|
374
|
+
```bash
|
|
375
|
+
llm-council \
|
|
376
|
+
--system-prompt "You are a reviewer. Return JSON: {score: int, summary: str}" \
|
|
377
|
+
--user-message "Review: ..." \
|
|
378
|
+
--models "anthropic/claude-sonnet-4.5,openai/gpt-5,google/gemini-2.5-pro" \
|
|
379
|
+
--chairman "anthropic/claude-sonnet-4.5" \
|
|
380
|
+
--output result.json
|
|
381
|
+
```
|
|
382
|
+
|
|
383
|
+
Or from files:
|
|
384
|
+
|
|
385
|
+
```bash
|
|
386
|
+
llm-council \
|
|
387
|
+
--system-prompt-file system.txt \
|
|
388
|
+
--user-message-file user.txt
|
|
389
|
+
```
|
|
390
|
+
|
|
391
|
+
When `--models` and `--chairman` are omitted, the CLI uses your configured defaults (see below).
|
|
392
|
+
|
|
393
|
+
**Environment:** Requires `OPENROUTER_API_KEY`.
|
|
394
|
+
|
|
395
|
+
### Manage Models
|
|
396
|
+
|
|
397
|
+
```bash
|
|
398
|
+
# List available models and current defaults
|
|
399
|
+
llm-council models
|
|
400
|
+
|
|
401
|
+
# Include live OpenRouter pricing
|
|
402
|
+
llm-council models --pricing
|
|
403
|
+
|
|
404
|
+
# Set default council models
|
|
405
|
+
llm-council models --set-defaults "anthropic/claude-sonnet-4.6,openai/gpt-5,google/gemini-3-pro-preview"
|
|
406
|
+
|
|
407
|
+
# Set default chairman
|
|
408
|
+
llm-council models --set-chairman "anthropic/claude-opus-4.5"
|
|
409
|
+
|
|
410
|
+
# Reset to built-in defaults
|
|
411
|
+
llm-council models --reset
|
|
412
|
+
```
|
|
413
|
+
|
|
414
|
+
Defaults are persisted to `~/.config/llm-council/config.json` and used automatically when `--models`/`--chairman` are omitted.
|
|
415
|
+
|
|
416
|
+
## Dependencies
|
|
417
|
+
|
|
418
|
+
| Package | Purpose |
|
|
419
|
+
|---------|---------|
|
|
420
|
+
| `httpx>=0.27` | Async HTTP for OpenRouter pricing API |
|
|
421
|
+
| `openai>=1.0` | OpenAI SDK (used as OpenRouter client) |
|
|
422
|
+
| `pydantic>=2.0` | Data models and validation |
|
|
423
|
+
|
|
424
|
+
## Package Structure
|
|
425
|
+
|
|
426
|
+
```
|
|
427
|
+
llm_council/
|
|
428
|
+
├── __init__.py # Public API exports
|
|
429
|
+
├── __main__.py # CLI entry point
|
|
430
|
+
├── client.py # LLMClient + error classes
|
|
431
|
+
├── models.py # Pydantic models (CouncilResult, etc.)
|
|
432
|
+
├── config.py # Model registry, pricing, defaults
|
|
433
|
+
└── council.py # CouncilService (3-stage orchestration)
|
|
434
|
+
```
|
|
435
|
+
|
|
436
|
+
## Design Decisions
|
|
437
|
+
|
|
438
|
+
- **OpenRouter, not direct APIs** — single API key accesses Anthropic, OpenAI, and Google models. No need for 3 separate accounts.
|
|
439
|
+
- **Schema-agnostic** — the council doesn't know what JSON schema you're using. It passes through whatever Stage 1 returns. Your application defines the schema via the system prompt.
|
|
440
|
+
- **Anonymous peer review** — assessments are labeled "Assessment A/B/C" during Stage 2. Model identities are only revealed in metadata.
|
|
441
|
+
- **Parallel execution** — Stage 1 and Stage 2 queries run concurrently via `asyncio.gather`. Wall-clock time is limited by the slowest model, not the sum.
|
|
442
|
+
- **Graceful degradation** — if a Stage 1 model fails, the council continues with the remaining assessments. If the chairman fails, it falls back to the top-ranked assessment.
|
|
443
|
+
|
|
444
|
+
## Cost Estimate
|
|
445
|
+
|
|
446
|
+
A council run with 3 models costs approximately **6-7x** a single-model call:
|
|
447
|
+
- Stage 1: 3 parallel calls (3x)
|
|
448
|
+
- Stage 2: 3 parallel reviews, each reviewing all assessments (3x, but shorter prompts)
|
|
449
|
+
- Stage 3: 1 chairman synthesis (1x)
|
|
450
|
+
|
|
451
|
+
## Related
|
|
452
|
+
|
|
453
|
+
- Used by downstream applications for multi-model council deliberation
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "llm-council"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Multi-model LLM council: independent assessments, anonymised peer review, chairman synthesis. Via OpenRouter."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
license = "MIT"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "the user" },
|
|
10
|
+
]
|
|
11
|
+
keywords = ["llm", "council", "multi-model", "openrouter", "peer-review", "ai"]
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Development Status :: 3 - Alpha",
|
|
14
|
+
"Intended Audience :: Developers",
|
|
15
|
+
"Intended Audience :: Science/Research",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.11",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
"Programming Language :: Python :: 3.13",
|
|
21
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
22
|
+
]
|
|
23
|
+
dependencies = [
|
|
24
|
+
"openai>=1.0",
|
|
25
|
+
"pydantic>=2.0",
|
|
26
|
+
"httpx>=0.27",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
[project.urls]
|
|
30
|
+
Homepage = "https://github.com/user/llm-council"
|
|
31
|
+
Repository = "https://github.com/user/llm-council"
|
|
32
|
+
Issues = "https://github.com/user/llm-council/issues"
|
|
33
|
+
|
|
34
|
+
[project.scripts]
|
|
35
|
+
llm-council = "llm_council.__main__:main"
|
|
36
|
+
|
|
37
|
+
[build-system]
|
|
38
|
+
requires = ["hatchling"]
|
|
39
|
+
build-backend = "hatchling.build"
|
|
40
|
+
|
|
41
|
+
[tool.hatch.build.targets.wheel]
|
|
42
|
+
packages = ["src/llm_council"]
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""llm-council: Multi-model LLM council via OpenRouter."""
|
|
2
|
+
|
|
3
|
+
from llm_council.checkpoint import CouncilCheckpointer
|
|
4
|
+
from llm_council.client import LLMClient, LLMResponseFormatError, LLMServiceError
|
|
5
|
+
from llm_council.council import CouncilService
|
|
6
|
+
from llm_council.models import (
|
|
7
|
+
CouncilAssessment,
|
|
8
|
+
CouncilMeta,
|
|
9
|
+
CouncilPeerReview,
|
|
10
|
+
CouncilResult,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"CouncilCheckpointer",
|
|
15
|
+
"LLMClient",
|
|
16
|
+
"LLMResponseFormatError",
|
|
17
|
+
"LLMServiceError",
|
|
18
|
+
"CouncilService",
|
|
19
|
+
"CouncilAssessment",
|
|
20
|
+
"CouncilMeta",
|
|
21
|
+
"CouncilPeerReview",
|
|
22
|
+
"CouncilResult",
|
|
23
|
+
]
|