flonat-research 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/.claude/agents/domain-reviewer.md +336 -0
  2. package/.claude/agents/fixer.md +226 -0
  3. package/.claude/agents/paper-critic.md +370 -0
  4. package/.claude/agents/peer-reviewer.md +289 -0
  5. package/.claude/agents/proposal-reviewer.md +215 -0
  6. package/.claude/agents/referee2-reviewer.md +367 -0
  7. package/.claude/agents/references/journal-referee-profiles.md +354 -0
  8. package/.claude/agents/references/paper-critic/council-personas.md +77 -0
  9. package/.claude/agents/references/paper-critic/council-prompts.md +198 -0
  10. package/.claude/agents/references/peer-reviewer/report-template.md +199 -0
  11. package/.claude/agents/references/peer-reviewer/sa-prompts.md +260 -0
  12. package/.claude/agents/references/peer-reviewer/security-scan.md +188 -0
  13. package/.claude/agents/references/proposal-reviewer/report-template.md +144 -0
  14. package/.claude/agents/references/proposal-reviewer/sa-prompts.md +149 -0
  15. package/.claude/agents/references/referee-config.md +114 -0
  16. package/.claude/agents/references/referee2-reviewer/audit-checklists.md +287 -0
  17. package/.claude/agents/references/referee2-reviewer/report-template.md +334 -0
  18. package/.claude/rules/design-before-results.md +52 -0
  19. package/.claude/rules/ignore-agents-md.md +17 -0
  20. package/.claude/rules/ignore-gemini-md.md +17 -0
  21. package/.claude/rules/lean-claude-md.md +45 -0
  22. package/.claude/rules/learn-tags.md +99 -0
  23. package/.claude/rules/overleaf-separation.md +67 -0
  24. package/.claude/rules/plan-first.md +175 -0
  25. package/.claude/rules/read-docs-first.md +50 -0
  26. package/.claude/rules/scope-discipline.md +28 -0
  27. package/.claude/settings.json +125 -0
  28. package/.context/current-focus.md +33 -0
  29. package/.context/preferences/priorities.md +36 -0
  30. package/.context/preferences/task-naming.md +28 -0
  31. package/.context/profile.md +29 -0
  32. package/.context/projects/_index.md +41 -0
  33. package/.context/projects/papers/nudge-exp.md +22 -0
  34. package/.context/projects/papers/uncertainty.md +31 -0
  35. package/.context/resources/claude-scientific-writer-review.md +48 -0
  36. package/.context/resources/cunningham-multi-analyst-agents.md +104 -0
  37. package/.context/resources/cunningham-multilang-code-audit.md +62 -0
  38. package/.context/resources/google-ai-co-scientist-review.md +72 -0
  39. package/.context/resources/karpathy-llm-council-review.md +58 -0
  40. package/.context/resources/multi-coder-reliability-protocol.md +175 -0
  41. package/.context/resources/pedro-santanna-takeaways.md +96 -0
  42. package/.context/resources/venue-rankings/abs_ajg_2024.csv +1823 -0
  43. package/.context/resources/venue-rankings/abs_ajg_2024_econ.csv +356 -0
  44. package/.context/resources/venue-rankings/cabs_4_4star_theory.csv +40 -0
  45. package/.context/resources/venue-rankings/core_2026.csv +801 -0
  46. package/.context/resources/venue-rankings.md +147 -0
  47. package/.context/workflows/README.md +69 -0
  48. package/.context/workflows/daily-review.md +91 -0
  49. package/.context/workflows/meeting-actions.md +108 -0
  50. package/.context/workflows/replication-protocol.md +155 -0
  51. package/.context/workflows/weekly-review.md +113 -0
  52. package/.mcp-server-biblio/formatters.py +158 -0
  53. package/.mcp-server-biblio/pyproject.toml +11 -0
  54. package/.mcp-server-biblio/server.py +678 -0
  55. package/.mcp-server-biblio/sources/__init__.py +14 -0
  56. package/.mcp-server-biblio/sources/base.py +73 -0
  57. package/.mcp-server-biblio/sources/formatters.py +83 -0
  58. package/.mcp-server-biblio/sources/models.py +22 -0
  59. package/.mcp-server-biblio/sources/multi_source.py +243 -0
  60. package/.mcp-server-biblio/sources/openalex_source.py +183 -0
  61. package/.mcp-server-biblio/sources/scopus_source.py +309 -0
  62. package/.mcp-server-biblio/sources/wos_source.py +508 -0
  63. package/.mcp-server-biblio/uv.lock +896 -0
  64. package/.scripts/README.md +161 -0
  65. package/.scripts/ai_pattern_density.py +446 -0
  66. package/.scripts/conf +445 -0
  67. package/.scripts/config.py +122 -0
  68. package/.scripts/count_inventory.py +275 -0
  69. package/.scripts/daily_digest.py +288 -0
  70. package/.scripts/done +177 -0
  71. package/.scripts/extract_meeting_actions.py +223 -0
  72. package/.scripts/focus +176 -0
  73. package/.scripts/generate-codex-agents-md.py +217 -0
  74. package/.scripts/inbox +194 -0
  75. package/.scripts/notion_helpers.py +325 -0
  76. package/.scripts/openalex/query_helpers.py +306 -0
  77. package/.scripts/papers +227 -0
  78. package/.scripts/query +223 -0
  79. package/.scripts/session-history.py +201 -0
  80. package/.scripts/skill-health.py +516 -0
  81. package/.scripts/skill-log-miner.py +273 -0
  82. package/.scripts/sync-to-codex.sh +252 -0
  83. package/.scripts/task +213 -0
  84. package/.scripts/tasks +190 -0
  85. package/.scripts/week +206 -0
  86. package/CLAUDE.md +197 -0
  87. package/LICENSE +21 -0
  88. package/MEMORY.md +38 -0
  89. package/README.md +269 -0
  90. package/docs/agents.md +44 -0
  91. package/docs/bibliography-setup.md +55 -0
  92. package/docs/council-mode.md +36 -0
  93. package/docs/getting-started.md +245 -0
  94. package/docs/hooks.md +38 -0
  95. package/docs/mcp-servers.md +82 -0
  96. package/docs/notion-setup.md +109 -0
  97. package/docs/rules.md +33 -0
  98. package/docs/scripts.md +303 -0
  99. package/docs/setup-overview/setup-overview.pdf +0 -0
  100. package/docs/skills.md +70 -0
  101. package/docs/system.md +159 -0
  102. package/hooks/block-destructive-git.sh +66 -0
  103. package/hooks/context-monitor.py +114 -0
  104. package/hooks/postcompact-restore.py +157 -0
  105. package/hooks/precompact-autosave.py +181 -0
  106. package/hooks/promise-checker.sh +124 -0
  107. package/hooks/protect-source-files.sh +81 -0
  108. package/hooks/resume-context-loader.sh +53 -0
  109. package/hooks/startup-context-loader.sh +102 -0
  110. package/package.json +51 -0
  111. package/packages/cli-council/.github/workflows/claude-code-review.yml +44 -0
  112. package/packages/cli-council/.github/workflows/claude.yml +50 -0
  113. package/packages/cli-council/README.md +100 -0
  114. package/packages/cli-council/pyproject.toml +43 -0
  115. package/packages/cli-council/src/cli_council/__init__.py +19 -0
  116. package/packages/cli-council/src/cli_council/__main__.py +185 -0
  117. package/packages/cli-council/src/cli_council/backends/__init__.py +8 -0
  118. package/packages/cli-council/src/cli_council/backends/base.py +81 -0
  119. package/packages/cli-council/src/cli_council/backends/claude.py +25 -0
  120. package/packages/cli-council/src/cli_council/backends/codex.py +27 -0
  121. package/packages/cli-council/src/cli_council/backends/gemini.py +26 -0
  122. package/packages/cli-council/src/cli_council/checkpoint.py +212 -0
  123. package/packages/cli-council/src/cli_council/config.py +51 -0
  124. package/packages/cli-council/src/cli_council/council.py +391 -0
  125. package/packages/cli-council/src/cli_council/models.py +46 -0
  126. package/packages/llm-council/.github/workflows/claude-code-review.yml +44 -0
  127. package/packages/llm-council/.github/workflows/claude.yml +50 -0
  128. package/packages/llm-council/README.md +453 -0
  129. package/packages/llm-council/pyproject.toml +42 -0
  130. package/packages/llm-council/src/llm_council/__init__.py +23 -0
  131. package/packages/llm-council/src/llm_council/__main__.py +259 -0
  132. package/packages/llm-council/src/llm_council/checkpoint.py +193 -0
  133. package/packages/llm-council/src/llm_council/client.py +253 -0
  134. package/packages/llm-council/src/llm_council/config.py +232 -0
  135. package/packages/llm-council/src/llm_council/council.py +482 -0
  136. package/packages/llm-council/src/llm_council/models.py +46 -0
  137. package/packages/mcp-bibliography/MEMORY.md +31 -0
  138. package/packages/mcp-bibliography/_app.py +226 -0
  139. package/packages/mcp-bibliography/formatters.py +158 -0
  140. package/packages/mcp-bibliography/log/2026-03-13-2100.md +35 -0
  141. package/packages/mcp-bibliography/pyproject.toml +15 -0
  142. package/packages/mcp-bibliography/run.sh +20 -0
  143. package/packages/mcp-bibliography/scholarly_formatters.py +83 -0
  144. package/packages/mcp-bibliography/server.py +1857 -0
  145. package/packages/mcp-bibliography/tools/__init__.py +28 -0
  146. package/packages/mcp-bibliography/tools/_registry.py +19 -0
  147. package/packages/mcp-bibliography/tools/altmetric.py +107 -0
  148. package/packages/mcp-bibliography/tools/core.py +92 -0
  149. package/packages/mcp-bibliography/tools/dblp.py +52 -0
  150. package/packages/mcp-bibliography/tools/openalex.py +296 -0
  151. package/packages/mcp-bibliography/tools/opencitations.py +102 -0
  152. package/packages/mcp-bibliography/tools/openreview.py +179 -0
  153. package/packages/mcp-bibliography/tools/orcid.py +131 -0
  154. package/packages/mcp-bibliography/tools/scholarly.py +575 -0
  155. package/packages/mcp-bibliography/tools/unpaywall.py +63 -0
  156. package/packages/mcp-bibliography/tools/zenodo.py +123 -0
  157. package/packages/mcp-bibliography/uv.lock +711 -0
  158. package/scripts/setup.sh +143 -0
  159. package/skills/beamer-deck/SKILL.md +199 -0
  160. package/skills/beamer-deck/references/quality-rubric.md +54 -0
  161. package/skills/beamer-deck/references/review-prompts.md +106 -0
  162. package/skills/bib-validate/SKILL.md +261 -0
  163. package/skills/bib-validate/references/council-mode.md +34 -0
  164. package/skills/bib-validate/references/deep-verify.md +79 -0
  165. package/skills/bib-validate/references/fix-mode.md +36 -0
  166. package/skills/bib-validate/references/openalex-verification.md +45 -0
  167. package/skills/bib-validate/references/preprint-check.md +31 -0
  168. package/skills/bib-validate/references/ref-manager-crossref.md +41 -0
  169. package/skills/bib-validate/references/report-template.md +82 -0
  170. package/skills/code-archaeology/SKILL.md +141 -0
  171. package/skills/code-review/SKILL.md +265 -0
  172. package/skills/code-review/references/quality-rubric.md +67 -0
  173. package/skills/consolidate-memory/SKILL.md +208 -0
  174. package/skills/context-status/SKILL.md +126 -0
  175. package/skills/creation-guard/SKILL.md +230 -0
  176. package/skills/devils-advocate/SKILL.md +130 -0
  177. package/skills/devils-advocate/references/competing-hypotheses.md +83 -0
  178. package/skills/init-project/SKILL.md +115 -0
  179. package/skills/init-project-course/references/memory-and-settings.md +92 -0
  180. package/skills/init-project-course/references/organise-templates.md +94 -0
  181. package/skills/init-project-course/skill.md +147 -0
  182. package/skills/init-project-light/skill.md +139 -0
  183. package/skills/init-project-research/SKILL.md +368 -0
  184. package/skills/init-project-research/references/atlas-pipeline-sync.md +70 -0
  185. package/skills/init-project-research/references/atlas-schema.md +81 -0
  186. package/skills/init-project-research/references/confirmation-report.md +39 -0
  187. package/skills/init-project-research/references/domain-profile-template.md +104 -0
  188. package/skills/init-project-research/references/interview-round3.md +34 -0
  189. package/skills/init-project-research/references/literature-discovery.md +43 -0
  190. package/skills/init-project-research/references/scaffold-details.md +197 -0
  191. package/skills/init-project-research/templates/field-calibration.md +60 -0
  192. package/skills/init-project-research/templates/pipeline-manifest.md +63 -0
  193. package/skills/init-project-research/templates/run-all.sh +116 -0
  194. package/skills/init-project-research/templates/seed-files.md +337 -0
  195. package/skills/insights-deck/SKILL.md +151 -0
  196. package/skills/interview-me/SKILL.md +157 -0
  197. package/skills/latex/SKILL.md +141 -0
  198. package/skills/latex/references/latex-configs.md +183 -0
  199. package/skills/latex-autofix/SKILL.md +230 -0
  200. package/skills/latex-autofix/references/known-errors.md +183 -0
  201. package/skills/latex-autofix/references/quality-rubric.md +50 -0
  202. package/skills/latex-health-check/SKILL.md +161 -0
  203. package/skills/learn/SKILL.md +220 -0
  204. package/skills/learn/scripts/validate_skill.py +265 -0
  205. package/skills/lessons-learned/SKILL.md +201 -0
  206. package/skills/literature/SKILL.md +335 -0
  207. package/skills/literature/references/agent-templates.md +393 -0
  208. package/skills/literature/references/bibliometric-apis.md +44 -0
  209. package/skills/literature/references/cli-council-search.md +79 -0
  210. package/skills/literature/references/openalex-api-guide.md +371 -0
  211. package/skills/literature/references/openalex-common-queries.md +381 -0
  212. package/skills/literature/references/openalex-workflows.md +248 -0
  213. package/skills/literature/references/reference-manager-sync.md +36 -0
  214. package/skills/literature/references/scopus-api-guide.md +208 -0
  215. package/skills/literature/references/wos-api-guide.md +308 -0
  216. package/skills/multi-perspective/SKILL.md +311 -0
  217. package/skills/multi-perspective/references/computational-many-analysts.md +77 -0
  218. package/skills/pipeline-manifest/SKILL.md +226 -0
  219. package/skills/pre-submission-report/SKILL.md +153 -0
  220. package/skills/process-reviews/SKILL.md +244 -0
  221. package/skills/process-reviews/references/rr-routing.md +101 -0
  222. package/skills/project-deck/SKILL.md +87 -0
  223. package/skills/project-safety/SKILL.md +135 -0
  224. package/skills/proofread/SKILL.md +254 -0
  225. package/skills/proofread/references/quality-rubric.md +104 -0
  226. package/skills/python-env/SKILL.md +57 -0
  227. package/skills/quarto-deck/SKILL.md +226 -0
  228. package/skills/quarto-deck/references/markdown-format.md +143 -0
  229. package/skills/quarto-deck/references/quality-rubric.md +54 -0
  230. package/skills/save-context/SKILL.md +174 -0
  231. package/skills/session-log/SKILL.md +98 -0
  232. package/skills/shared/concept-validation-gate.md +161 -0
  233. package/skills/shared/council-protocol.md +265 -0
  234. package/skills/shared/distribution-diagnostics.md +164 -0
  235. package/skills/shared/engagement-stratified-sampling.md +218 -0
  236. package/skills/shared/escalation-protocol.md +74 -0
  237. package/skills/shared/external-audit-protocol.md +205 -0
  238. package/skills/shared/intercoder-reliability.md +256 -0
  239. package/skills/shared/mcp-degradation.md +81 -0
  240. package/skills/shared/method-probing-questions.md +163 -0
  241. package/skills/shared/multi-language-conventions.md +143 -0
  242. package/skills/shared/paid-api-safety.md +174 -0
  243. package/skills/shared/palettes.md +90 -0
  244. package/skills/shared/progressive-disclosure.md +92 -0
  245. package/skills/shared/project-documentation-content.md +443 -0
  246. package/skills/shared/project-documentation-format.md +281 -0
  247. package/skills/shared/project-documentation.md +100 -0
  248. package/skills/shared/publication-output.md +138 -0
  249. package/skills/shared/quality-scoring.md +70 -0
  250. package/skills/shared/reference-resolution.md +77 -0
  251. package/skills/shared/research-quality-rubric.md +165 -0
  252. package/skills/shared/rhetoric-principles.md +54 -0
  253. package/skills/shared/skill-design-patterns.md +272 -0
  254. package/skills/shared/skill-index.md +240 -0
  255. package/skills/shared/system-documentation.md +334 -0
  256. package/skills/shared/tikz-rules.md +402 -0
  257. package/skills/shared/validation-tiers.md +121 -0
  258. package/skills/shared/venue-guides/README.md +46 -0
  259. package/skills/shared/venue-guides/cell_press_style.md +483 -0
  260. package/skills/shared/venue-guides/conferences_formatting.md +564 -0
  261. package/skills/shared/venue-guides/cs_conference_style.md +463 -0
  262. package/skills/shared/venue-guides/examples/cell_summary_example.md +247 -0
  263. package/skills/shared/venue-guides/examples/medical_structured_abstract.md +313 -0
  264. package/skills/shared/venue-guides/examples/nature_abstract_examples.md +213 -0
  265. package/skills/shared/venue-guides/examples/neurips_introduction_example.md +245 -0
  266. package/skills/shared/venue-guides/journals_formatting.md +486 -0
  267. package/skills/shared/venue-guides/medical_journal_styles.md +535 -0
  268. package/skills/shared/venue-guides/ml_conference_style.md +556 -0
  269. package/skills/shared/venue-guides/nature_science_style.md +405 -0
  270. package/skills/shared/venue-guides/reviewer_expectations.md +417 -0
  271. package/skills/shared/venue-guides/venue_writing_styles.md +321 -0
  272. package/skills/split-pdf/SKILL.md +172 -0
  273. package/skills/split-pdf/methodology.md +48 -0
  274. package/skills/sync-notion/SKILL.md +93 -0
  275. package/skills/system-audit/SKILL.md +157 -0
  276. package/skills/system-audit/references/sub-agent-prompts.md +294 -0
  277. package/skills/task-management/SKILL.md +131 -0
  278. package/skills/update-focus/SKILL.md +204 -0
  279. package/skills/update-project-doc/SKILL.md +194 -0
  280. package/skills/validate-bib/SKILL.md +242 -0
  281. package/skills/validate-bib/references/council-mode.md +34 -0
  282. package/skills/validate-bib/references/deep-verify.md +71 -0
  283. package/skills/validate-bib/references/openalex-verification.md +45 -0
  284. package/skills/validate-bib/references/preprint-check.md +31 -0
  285. package/skills/validate-bib/references/report-template.md +62 -0
@@ -0,0 +1,482 @@
1
+ """3-stage LLM Council orchestration.
2
+
3
+ Adapted from karpathy/llm-council:
4
+ Stage 1 -- Individual assessments (parallel, structured JSON or text)
5
+ Stage 2 -- Peer review (parallel, free-form text with FINAL RANKING)
6
+ Stage 3 -- Chairman synthesis (single model, structured JSON or text)
7
+
8
+ Supports checkpoint-based session resumption (inspired by Owlex) and
9
+ atomic file-based state (inspired by agents-council).
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import asyncio
15
+ import json
16
+ import logging
17
+ import re
18
+ from collections import defaultdict
19
+ from pathlib import Path
20
+ from time import perf_counter
21
+
22
+ from llm_council.checkpoint import CouncilCheckpointer
23
+ from llm_council.client import LLMClient
24
+ from llm_council.config import AVAILABLE_MODELS, model_display_name
25
+ from llm_council.models import (
26
+ CouncilAssessment,
27
+ CouncilMeta,
28
+ CouncilPeerReview,
29
+ CouncilResult,
30
+ )
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ class CouncilService:
36
+ """Orchestrates a multi-model council review."""
37
+
38
+ def __init__(self, llm: LLMClient, *, max_tokens: int | None = None) -> None:
39
+ self.llm = llm
40
+ self._max_tokens = max_tokens
41
+
42
+ async def run_council(
43
+ self,
44
+ system_prompt: str,
45
+ user_msg: str,
46
+ council_models: list[str],
47
+ chairman_model: str,
48
+ *,
49
+ existing_result: dict | None = None,
50
+ existing_model: str | None = None,
51
+ stage2_system: str | None = None,
52
+ stage3_prompt_builder: object | None = None,
53
+ checkpoint_dir: str | Path | None = None,
54
+ resume: bool = False,
55
+ ) -> CouncilResult:
56
+ """Run the full 3-stage council process.
57
+
58
+ Parameters
59
+ ----------
60
+ system_prompt:
61
+ The system prompt for Stage 1 assessments.
62
+ user_msg:
63
+ The user message for Stage 1 assessments.
64
+ council_models:
65
+ List of OpenRouter model IDs to query (3+ models).
66
+ chairman_model:
67
+ Model to use for the final synthesis.
68
+ existing_result:
69
+ If provided, reuse this as one of the Stage 1 assessments.
70
+ existing_model:
71
+ The model ID that produced ``existing_result``.
72
+ stage2_system:
73
+ Optional custom system prompt for Stage 2 peer review.
74
+ Defaults to a generic meta-reviewer prompt.
75
+ stage3_prompt_builder:
76
+ Optional callable(assessments, peer_reviews, user_msg) -> str
77
+ that builds a custom Stage 3 chairman prompt. If None, uses
78
+ the default synthesis prompt.
79
+ checkpoint_dir:
80
+ Directory for checkpoint files. If provided, each stage's
81
+ results are saved atomically for crash recovery and resumption.
82
+ resume:
83
+ If True and checkpoint_dir is provided, resume from the last
84
+ completed stage of the most recent run.
85
+ """
86
+ t_total = perf_counter()
87
+
88
+ # Set up checkpointing
89
+ ckpt = None
90
+ resume_from = 0
91
+ if checkpoint_dir:
92
+ checkpoint_path = Path(checkpoint_dir)
93
+ if resume:
94
+ probe = CouncilCheckpointer(checkpoint_path)
95
+ latest_run = probe.find_latest_run()
96
+ if latest_run:
97
+ ckpt = CouncilCheckpointer(checkpoint_path, run_id=latest_run)
98
+ resume_from = ckpt.last_completed_stage()
99
+ if resume_from > 0:
100
+ logger.info(
101
+ "Resuming run %s from stage %d",
102
+ latest_run, resume_from + 1,
103
+ )
104
+ else:
105
+ ckpt = CouncilCheckpointer(checkpoint_path)
106
+ else:
107
+ ckpt = CouncilCheckpointer(checkpoint_path)
108
+ else:
109
+ ckpt = CouncilCheckpointer(checkpoint_path)
110
+
111
+ # Stage 1
112
+ stage1_ms = 0
113
+ if resume_from >= 1 and ckpt:
114
+ saved = ckpt.load_stage1()
115
+ if saved:
116
+ assessments = [CouncilAssessment(**a) for a in saved]
117
+ logger.info("Stage 1: loaded %d assessments from checkpoint", len(assessments))
118
+ else:
119
+ t1 = perf_counter()
120
+ assessments = await self._stage1_collect(
121
+ system_prompt, user_msg, council_models,
122
+ existing_result=existing_result,
123
+ existing_model=existing_model,
124
+ )
125
+ stage1_ms = int((perf_counter() - t1) * 1000)
126
+ else:
127
+ t1 = perf_counter()
128
+ assessments = await self._stage1_collect(
129
+ system_prompt, user_msg, council_models,
130
+ existing_result=existing_result,
131
+ existing_model=existing_model,
132
+ )
133
+ stage1_ms = int((perf_counter() - t1) * 1000)
134
+
135
+ if not assessments:
136
+ return CouncilResult(
137
+ final_result={},
138
+ assessments=[],
139
+ peer_reviews=[],
140
+ meta=CouncilMeta(
141
+ council_models=council_models,
142
+ chairman_model=chairman_model,
143
+ stage1_ms=stage1_ms,
144
+ total_ms=int((perf_counter() - t_total) * 1000),
145
+ reused_model=existing_model,
146
+ ),
147
+ )
148
+
149
+ for i, a in enumerate(assessments):
150
+ a.label = f"Assessment {chr(65 + i)}"
151
+
152
+ # Checkpoint Stage 1
153
+ if ckpt and resume_from < 1:
154
+ ckpt.save_stage1(
155
+ [a.model_dump() for a in assessments],
156
+ [a.model for a in assessments],
157
+ )
158
+ pending = ckpt.pending_participants(
159
+ council_models,
160
+ [a.model for a in assessments],
161
+ )
162
+ if pending:
163
+ logger.warning("Stage 1: pending models: %s", pending)
164
+
165
+ # Stage 2
166
+ stage2_ms = 0
167
+ label_to_model = {a.label: a.model for a in assessments}
168
+
169
+ if resume_from >= 2 and ckpt:
170
+ saved = ckpt.load_stage2()
171
+ if saved:
172
+ reviews_data, saved_rankings = saved
173
+ peer_reviews = [CouncilPeerReview(**r) for r in reviews_data]
174
+ aggregate_rankings = saved_rankings
175
+ logger.info("Stage 2: loaded %d reviews from checkpoint", len(peer_reviews))
176
+ else:
177
+ t2 = perf_counter()
178
+ peer_reviews, label_to_model = await self._stage2_peer_review(
179
+ system_prompt, user_msg, assessments, council_models,
180
+ custom_system=stage2_system,
181
+ )
182
+ stage2_ms = int((perf_counter() - t2) * 1000)
183
+ aggregate_rankings = self._calculate_aggregate_rankings(
184
+ peer_reviews, label_to_model,
185
+ )
186
+ else:
187
+ t2 = perf_counter()
188
+ peer_reviews, label_to_model = await self._stage2_peer_review(
189
+ system_prompt, user_msg, assessments, council_models,
190
+ custom_system=stage2_system,
191
+ )
192
+ stage2_ms = int((perf_counter() - t2) * 1000)
193
+ aggregate_rankings = self._calculate_aggregate_rankings(
194
+ peer_reviews, label_to_model,
195
+ )
196
+
197
+ # Checkpoint Stage 2
198
+ if ckpt and resume_from < 2:
199
+ ckpt.save_stage2(
200
+ [r.model_dump() for r in peer_reviews],
201
+ [r.model for r in peer_reviews],
202
+ aggregate_rankings=aggregate_rankings,
203
+ )
204
+
205
+ # Stage 3
206
+ t3 = perf_counter()
207
+ final_result, stage3_fallback = await self._stage3_synthesise(
208
+ system_prompt, user_msg, assessments, peer_reviews,
209
+ chairman_model,
210
+ custom_prompt_builder=stage3_prompt_builder,
211
+ )
212
+ stage3_ms = int((perf_counter() - t3) * 1000)
213
+
214
+ # Checkpoint Stage 3
215
+ if ckpt:
216
+ ckpt.save_stage3(final_result, chairman_model)
217
+
218
+ total_ms = int((perf_counter() - t_total) * 1000)
219
+
220
+ return CouncilResult(
221
+ final_result=final_result,
222
+ assessments=assessments,
223
+ peer_reviews=peer_reviews,
224
+ meta=CouncilMeta(
225
+ council_models=council_models,
226
+ chairman_model=chairman_model,
227
+ stage1_ms=stage1_ms,
228
+ stage2_ms=stage2_ms,
229
+ stage3_ms=stage3_ms,
230
+ total_ms=total_ms,
231
+ reused_model=existing_model,
232
+ aggregate_rankings=aggregate_rankings,
233
+ stage3_fallback=stage3_fallback,
234
+ ),
235
+ )
236
+
237
+ # ------------------------------------------------------------------
238
+ # Stage 1
239
+ # ------------------------------------------------------------------
240
+
241
+ async def _stage1_collect(
242
+ self,
243
+ system_prompt: str,
244
+ user_msg: str,
245
+ council_models: list[str],
246
+ *,
247
+ existing_result: dict | None = None,
248
+ existing_model: str | None = None,
249
+ ) -> list[CouncilAssessment]:
250
+ assessments: list[CouncilAssessment] = []
251
+
252
+ if existing_result and existing_model:
253
+ assessments.append(CouncilAssessment(
254
+ model=existing_model,
255
+ model_name=model_display_name(existing_model),
256
+ result_json=existing_result,
257
+ ))
258
+
259
+ models_to_query = [
260
+ m for m in council_models
261
+ if m != existing_model or existing_result is None
262
+ ]
263
+
264
+ if not models_to_query:
265
+ return assessments
266
+
267
+ async def _query_one(model_id: str) -> CouncilAssessment | None:
268
+ try:
269
+ result = await self.llm.chat_json(
270
+ system_prompt, user_msg,
271
+ model=model_id, max_tokens=self._max_tokens,
272
+ )
273
+ return CouncilAssessment(
274
+ model=model_id,
275
+ model_name=model_display_name(model_id),
276
+ result_json=result,
277
+ )
278
+ except Exception:
279
+ logger.exception("Council Stage 1: model %s failed", model_id)
280
+ return None
281
+
282
+ tasks = [_query_one(m) for m in models_to_query]
283
+ results = await asyncio.gather(*tasks)
284
+
285
+ for r in results:
286
+ if r is not None:
287
+ assessments.append(r)
288
+
289
+ logger.info(
290
+ "Council Stage 1: %d/%d models responded",
291
+ len(assessments), len(council_models),
292
+ )
293
+ return assessments
294
+
295
+ # ------------------------------------------------------------------
296
+ # Stage 2
297
+ # ------------------------------------------------------------------
298
+
299
+ async def _stage2_peer_review(
300
+ self,
301
+ system_prompt: str,
302
+ user_msg: str,
303
+ assessments: list[CouncilAssessment],
304
+ council_models: list[str],
305
+ *,
306
+ custom_system: str | None = None,
307
+ ) -> tuple[list[CouncilPeerReview], dict[str, str]]:
308
+ assessments_text = "\n\n---\n\n".join(
309
+ f"**{a.label}:**\n```json\n{json.dumps(a.result_json, indent=2)}\n```"
310
+ for a in assessments
311
+ )
312
+
313
+ label_to_model = {a.label: a.model for a in assessments}
314
+
315
+ review_prompt = f"""You are reviewing multiple assessments of the same question/task.
316
+
317
+ The original question/context given to all assessors:
318
+ {user_msg[:3000]}
319
+
320
+ Here are the anonymised assessments:
321
+
322
+ {assessments_text}
323
+
324
+ Your task:
325
+ 1. Evaluate each assessment individually. What does it do well? What does it miss or get wrong?
326
+ 2. Identify specific areas of AGREEMENT across assessments.
327
+ 3. Identify specific areas of DISAGREEMENT and explain which position you find more convincing and why.
328
+ 4. Provide a final ranking from best to worst.
329
+
330
+ IMPORTANT: Your final ranking MUST be formatted EXACTLY as follows:
331
+ - Start with the line "FINAL RANKING:" (all caps, with colon)
332
+ - Then list the assessments from best to worst as a numbered list
333
+ - Each line should be: number, period, space, then ONLY the assessment label (e.g., "1. Assessment A")
334
+
335
+ Now provide your evaluation and ranking:"""
336
+
337
+ review_system = custom_system or (
338
+ "You are an expert meta-reviewer. You evaluate and compare "
339
+ "multiple independent assessments, identifying strengths, "
340
+ "weaknesses, agreements, and disagreements."
341
+ )
342
+
343
+ models_to_review = [m for m in council_models if m in {a.model for a in assessments}]
344
+ if not models_to_review:
345
+ models_to_review = council_models
346
+
347
+ async def _review_one(model_id: str) -> CouncilPeerReview | None:
348
+ try:
349
+ text = await self.llm.chat_text(
350
+ review_system, review_prompt,
351
+ model=model_id, max_tokens=self._max_tokens,
352
+ )
353
+ parsed = self._parse_ranking_from_text(text)
354
+ return CouncilPeerReview(
355
+ model=model_id,
356
+ model_name=model_display_name(model_id),
357
+ review_text=text,
358
+ parsed_ranking=parsed,
359
+ )
360
+ except Exception:
361
+ logger.exception("Council Stage 2: model %s failed", model_id)
362
+ return None
363
+
364
+ tasks = [_review_one(m) for m in models_to_review]
365
+ results = await asyncio.gather(*tasks)
366
+
367
+ reviews = [r for r in results if r is not None]
368
+ logger.info(
369
+ "Council Stage 2: %d/%d models reviewed",
370
+ len(reviews), len(models_to_review),
371
+ )
372
+ return reviews, label_to_model
373
+
374
+ # ------------------------------------------------------------------
375
+ # Stage 3
376
+ # ------------------------------------------------------------------
377
+
378
+ async def _stage3_synthesise(
379
+ self,
380
+ system_prompt: str,
381
+ user_msg: str,
382
+ assessments: list[CouncilAssessment],
383
+ peer_reviews: list[CouncilPeerReview],
384
+ chairman_model: str,
385
+ *,
386
+ custom_prompt_builder: object | None = None,
387
+ ) -> tuple[dict, bool]:
388
+ if custom_prompt_builder and callable(custom_prompt_builder):
389
+ chairman_prompt = custom_prompt_builder(assessments, peer_reviews, user_msg)
390
+ else:
391
+ assessments_text = "\n\n".join(
392
+ f"**{a.label}** (by {a.model_name}):\n"
393
+ f"```json\n{json.dumps(a.result_json, indent=2)}\n```"
394
+ for a in assessments
395
+ )
396
+
397
+ reviews_text = "\n\n".join(
398
+ f"**Review by {r.model_name}:**\n{r.review_text}"
399
+ for r in peer_reviews
400
+ )
401
+
402
+ chairman_prompt = f"""You are the Chairman of an LLM Council. Multiple AI models have independently assessed the same question, and then peer-reviewed each other's assessments.
403
+
404
+ ORIGINAL CONTEXT:
405
+ {user_msg[:3000]}
406
+
407
+ STAGE 1 -- Individual Assessments:
408
+ {assessments_text}
409
+
410
+ STAGE 2 -- Peer Reviews:
411
+ {reviews_text}
412
+
413
+ Your task as Chairman:
414
+ 1. Consider all individual assessments and their insights
415
+ 2. Consider the peer reviews and what they reveal about quality and disagreements
416
+ 3. Identify areas of strong consensus vs. genuine disagreement
417
+ 4. Synthesise a SINGLE, comprehensive answer that represents the council's collective wisdom
418
+
419
+ Where the council agrees, reflect that consensus. Where they disagree, use your judgment to select the most well-reasoned position and explain why.
420
+
421
+ You MUST respond with valid JSON matching the EXACT SAME SCHEMA as the individual assessments above. Respond ONLY with valid JSON."""
422
+
423
+ try:
424
+ result = await self.llm.chat_json(
425
+ system_prompt, chairman_prompt,
426
+ model=chairman_model, max_tokens=self._max_tokens,
427
+ )
428
+ return result, False
429
+ except Exception:
430
+ logger.exception("Council Stage 3: chairman %s failed", chairman_model)
431
+ if assessments:
432
+ return assessments[0].result_json, True
433
+ return {}, True
434
+
435
+ # ------------------------------------------------------------------
436
+ # Ranking utilities
437
+ # ------------------------------------------------------------------
438
+
439
+ @staticmethod
440
+ def _parse_ranking_from_text(text: str) -> list[str]:
441
+ if "FINAL RANKING:" in text:
442
+ parts = text.split("FINAL RANKING:")
443
+ if len(parts) >= 2:
444
+ ranking_section = parts[1]
445
+ numbered = re.findall(
446
+ r"\d+\.\s*Assessment [A-Z]", ranking_section,
447
+ )
448
+ if numbered:
449
+ return [
450
+ re.search(r"Assessment [A-Z]", m).group()
451
+ for m in numbered
452
+ ]
453
+ matches = re.findall(r"Assessment [A-Z]", ranking_section)
454
+ return matches
455
+
456
+ return re.findall(r"Assessment [A-Z]", text)
457
+
458
+ @staticmethod
459
+ def _calculate_aggregate_rankings(
460
+ peer_reviews: list[CouncilPeerReview],
461
+ label_to_model: dict[str, str],
462
+ ) -> list[dict]:
463
+ positions: dict[str, list[int]] = defaultdict(list)
464
+
465
+ for review in peer_reviews:
466
+ for pos, label in enumerate(review.parsed_ranking, start=1):
467
+ if label in label_to_model:
468
+ positions[label].append(pos)
469
+
470
+ aggregate = []
471
+ for label, pos_list in positions.items():
472
+ avg = sum(pos_list) / len(pos_list)
473
+ aggregate.append({
474
+ "label": label,
475
+ "model": label_to_model.get(label, "unknown"),
476
+ "model_name": model_display_name(label_to_model.get(label, "")),
477
+ "average_rank": round(avg, 2),
478
+ "rankings_count": len(pos_list),
479
+ })
480
+
481
+ aggregate.sort(key=lambda x: x["average_rank"])
482
+ return aggregate
@@ -0,0 +1,46 @@
1
+ """Pydantic models for council deliberation results."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+
8
+ class CouncilAssessment(BaseModel):
9
+ """One council member's individual assessment (Stage 1)."""
10
+
11
+ model: str # OpenRouter model ID
12
+ model_name: str # human-readable name
13
+ result_json: dict # raw JSON response (schema depends on consumer)
14
+ label: str = "" # anonymised label, e.g. "Assessment A"
15
+
16
+
17
+ class CouncilPeerReview(BaseModel):
18
+ """One council member's peer review of all assessments (Stage 2)."""
19
+
20
+ model: str
21
+ model_name: str
22
+ review_text: str # free-form evaluation + disagreement analysis
23
+ parsed_ranking: list[str] = Field(default_factory=list)
24
+
25
+
26
+ class CouncilMeta(BaseModel):
27
+ """Metadata for a council run."""
28
+
29
+ council_models: list[str]
30
+ chairman_model: str
31
+ stage1_ms: int = 0
32
+ stage2_ms: int = 0
33
+ stage3_ms: int = 0
34
+ total_ms: int = 0
35
+ reused_model: str | None = None
36
+ aggregate_rankings: list[dict] = Field(default_factory=list)
37
+ stage3_fallback: bool = False
38
+
39
+
40
+ class CouncilResult(BaseModel):
41
+ """Full council deliberation result."""
42
+
43
+ final_result: dict # synthesised response (schema depends on consumer)
44
+ assessments: list[CouncilAssessment]
45
+ peer_reviews: list[CouncilPeerReview]
46
+ meta: CouncilMeta
@@ -0,0 +1,31 @@
1
+ # Bibliography MCP — Known Issues & Learnings
2
+
3
+ ## Known Issues
4
+
5
+ ### S2 `scholarly_paper_detail` returns wrong journal names
6
+
7
+ Semantic Scholar maps journal abbreviations incorrectly for some management journals:
8
+
9
+ | S2 returns | Actual journal |
10
+ |-----------|---------------|
11
+ | Southern Medical Journal | Strategic Management Journal |
12
+ | Quality Engineering | Journal of Management Studies |
13
+
14
+ **Impact:** BibTeX from `citationStyles` field and `venue` metadata are wrong.
15
+ **Mitigation:** Always cross-check journal names against CrossRef DOI lookup or `scholarly_verify_dois` results.
16
+
17
+ ### `scholarly_search` unreliable for specific known papers
18
+
19
+ Returns noisy, irrelevant results when searching for a specific paper by author + title + year (e.g., management papers return medical/biology results).
20
+
21
+ **Impact:** Cannot reliably find seminal papers in management/org theory.
22
+ **Mitigation:** Use CrossRef API directly for targeted lookups:
23
+ ```bash
24
+ curl -sL "https://api.crossref.org/works?query.bibliographic=URL-encoded+title+author&rows=3"
25
+ ```
26
+ Reserve `scholarly_search` for broad topic discovery only.
27
+
28
+ ## Citations
29
+
30
+ [LEARN:citation] S2 "Southern Medical Journal" → Strategic Management Journal (SMJ abbreviation collision)
31
+ [LEARN:citation] S2 "Quality Engineering" → Journal of Management Studies (JMS abbreviation collision)