flonat-research 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/.claude/agents/domain-reviewer.md +336 -0
  2. package/.claude/agents/fixer.md +226 -0
  3. package/.claude/agents/paper-critic.md +370 -0
  4. package/.claude/agents/peer-reviewer.md +289 -0
  5. package/.claude/agents/proposal-reviewer.md +215 -0
  6. package/.claude/agents/referee2-reviewer.md +367 -0
  7. package/.claude/agents/references/journal-referee-profiles.md +354 -0
  8. package/.claude/agents/references/paper-critic/council-personas.md +77 -0
  9. package/.claude/agents/references/paper-critic/council-prompts.md +198 -0
  10. package/.claude/agents/references/peer-reviewer/report-template.md +199 -0
  11. package/.claude/agents/references/peer-reviewer/sa-prompts.md +260 -0
  12. package/.claude/agents/references/peer-reviewer/security-scan.md +188 -0
  13. package/.claude/agents/references/proposal-reviewer/report-template.md +144 -0
  14. package/.claude/agents/references/proposal-reviewer/sa-prompts.md +149 -0
  15. package/.claude/agents/references/referee-config.md +114 -0
  16. package/.claude/agents/references/referee2-reviewer/audit-checklists.md +287 -0
  17. package/.claude/agents/references/referee2-reviewer/report-template.md +334 -0
  18. package/.claude/rules/design-before-results.md +52 -0
  19. package/.claude/rules/ignore-agents-md.md +17 -0
  20. package/.claude/rules/ignore-gemini-md.md +17 -0
  21. package/.claude/rules/lean-claude-md.md +45 -0
  22. package/.claude/rules/learn-tags.md +99 -0
  23. package/.claude/rules/overleaf-separation.md +67 -0
  24. package/.claude/rules/plan-first.md +175 -0
  25. package/.claude/rules/read-docs-first.md +50 -0
  26. package/.claude/rules/scope-discipline.md +28 -0
  27. package/.claude/settings.json +125 -0
  28. package/.context/current-focus.md +33 -0
  29. package/.context/preferences/priorities.md +36 -0
  30. package/.context/preferences/task-naming.md +28 -0
  31. package/.context/profile.md +29 -0
  32. package/.context/projects/_index.md +41 -0
  33. package/.context/projects/papers/nudge-exp.md +22 -0
  34. package/.context/projects/papers/uncertainty.md +31 -0
  35. package/.context/resources/claude-scientific-writer-review.md +48 -0
  36. package/.context/resources/cunningham-multi-analyst-agents.md +104 -0
  37. package/.context/resources/cunningham-multilang-code-audit.md +62 -0
  38. package/.context/resources/google-ai-co-scientist-review.md +72 -0
  39. package/.context/resources/karpathy-llm-council-review.md +58 -0
  40. package/.context/resources/multi-coder-reliability-protocol.md +175 -0
  41. package/.context/resources/pedro-santanna-takeaways.md +96 -0
  42. package/.context/resources/venue-rankings/abs_ajg_2024.csv +1823 -0
  43. package/.context/resources/venue-rankings/abs_ajg_2024_econ.csv +356 -0
  44. package/.context/resources/venue-rankings/cabs_4_4star_theory.csv +40 -0
  45. package/.context/resources/venue-rankings/core_2026.csv +801 -0
  46. package/.context/resources/venue-rankings.md +147 -0
  47. package/.context/workflows/README.md +69 -0
  48. package/.context/workflows/daily-review.md +91 -0
  49. package/.context/workflows/meeting-actions.md +108 -0
  50. package/.context/workflows/replication-protocol.md +155 -0
  51. package/.context/workflows/weekly-review.md +113 -0
  52. package/.mcp-server-biblio/formatters.py +158 -0
  53. package/.mcp-server-biblio/pyproject.toml +11 -0
  54. package/.mcp-server-biblio/server.py +678 -0
  55. package/.mcp-server-biblio/sources/__init__.py +14 -0
  56. package/.mcp-server-biblio/sources/base.py +73 -0
  57. package/.mcp-server-biblio/sources/formatters.py +83 -0
  58. package/.mcp-server-biblio/sources/models.py +22 -0
  59. package/.mcp-server-biblio/sources/multi_source.py +243 -0
  60. package/.mcp-server-biblio/sources/openalex_source.py +183 -0
  61. package/.mcp-server-biblio/sources/scopus_source.py +309 -0
  62. package/.mcp-server-biblio/sources/wos_source.py +508 -0
  63. package/.mcp-server-biblio/uv.lock +896 -0
  64. package/.scripts/README.md +161 -0
  65. package/.scripts/ai_pattern_density.py +446 -0
  66. package/.scripts/conf +445 -0
  67. package/.scripts/config.py +122 -0
  68. package/.scripts/count_inventory.py +275 -0
  69. package/.scripts/daily_digest.py +288 -0
  70. package/.scripts/done +177 -0
  71. package/.scripts/extract_meeting_actions.py +223 -0
  72. package/.scripts/focus +176 -0
  73. package/.scripts/generate-codex-agents-md.py +217 -0
  74. package/.scripts/inbox +194 -0
  75. package/.scripts/notion_helpers.py +325 -0
  76. package/.scripts/openalex/query_helpers.py +306 -0
  77. package/.scripts/papers +227 -0
  78. package/.scripts/query +223 -0
  79. package/.scripts/session-history.py +201 -0
  80. package/.scripts/skill-health.py +516 -0
  81. package/.scripts/skill-log-miner.py +273 -0
  82. package/.scripts/sync-to-codex.sh +252 -0
  83. package/.scripts/task +213 -0
  84. package/.scripts/tasks +190 -0
  85. package/.scripts/week +206 -0
  86. package/CLAUDE.md +197 -0
  87. package/LICENSE +21 -0
  88. package/MEMORY.md +38 -0
  89. package/README.md +269 -0
  90. package/docs/agents.md +44 -0
  91. package/docs/bibliography-setup.md +55 -0
  92. package/docs/council-mode.md +36 -0
  93. package/docs/getting-started.md +245 -0
  94. package/docs/hooks.md +38 -0
  95. package/docs/mcp-servers.md +82 -0
  96. package/docs/notion-setup.md +109 -0
  97. package/docs/rules.md +33 -0
  98. package/docs/scripts.md +303 -0
  99. package/docs/setup-overview/setup-overview.pdf +0 -0
  100. package/docs/skills.md +70 -0
  101. package/docs/system.md +159 -0
  102. package/hooks/block-destructive-git.sh +66 -0
  103. package/hooks/context-monitor.py +114 -0
  104. package/hooks/postcompact-restore.py +157 -0
  105. package/hooks/precompact-autosave.py +181 -0
  106. package/hooks/promise-checker.sh +124 -0
  107. package/hooks/protect-source-files.sh +81 -0
  108. package/hooks/resume-context-loader.sh +53 -0
  109. package/hooks/startup-context-loader.sh +102 -0
  110. package/package.json +51 -0
  111. package/packages/cli-council/.github/workflows/claude-code-review.yml +44 -0
  112. package/packages/cli-council/.github/workflows/claude.yml +50 -0
  113. package/packages/cli-council/README.md +100 -0
  114. package/packages/cli-council/pyproject.toml +43 -0
  115. package/packages/cli-council/src/cli_council/__init__.py +19 -0
  116. package/packages/cli-council/src/cli_council/__main__.py +185 -0
  117. package/packages/cli-council/src/cli_council/backends/__init__.py +8 -0
  118. package/packages/cli-council/src/cli_council/backends/base.py +81 -0
  119. package/packages/cli-council/src/cli_council/backends/claude.py +25 -0
  120. package/packages/cli-council/src/cli_council/backends/codex.py +27 -0
  121. package/packages/cli-council/src/cli_council/backends/gemini.py +26 -0
  122. package/packages/cli-council/src/cli_council/checkpoint.py +212 -0
  123. package/packages/cli-council/src/cli_council/config.py +51 -0
  124. package/packages/cli-council/src/cli_council/council.py +391 -0
  125. package/packages/cli-council/src/cli_council/models.py +46 -0
  126. package/packages/llm-council/.github/workflows/claude-code-review.yml +44 -0
  127. package/packages/llm-council/.github/workflows/claude.yml +50 -0
  128. package/packages/llm-council/README.md +453 -0
  129. package/packages/llm-council/pyproject.toml +42 -0
  130. package/packages/llm-council/src/llm_council/__init__.py +23 -0
  131. package/packages/llm-council/src/llm_council/__main__.py +259 -0
  132. package/packages/llm-council/src/llm_council/checkpoint.py +193 -0
  133. package/packages/llm-council/src/llm_council/client.py +253 -0
  134. package/packages/llm-council/src/llm_council/config.py +232 -0
  135. package/packages/llm-council/src/llm_council/council.py +482 -0
  136. package/packages/llm-council/src/llm_council/models.py +46 -0
  137. package/packages/mcp-bibliography/MEMORY.md +31 -0
  138. package/packages/mcp-bibliography/_app.py +226 -0
  139. package/packages/mcp-bibliography/formatters.py +158 -0
  140. package/packages/mcp-bibliography/log/2026-03-13-2100.md +35 -0
  141. package/packages/mcp-bibliography/pyproject.toml +15 -0
  142. package/packages/mcp-bibliography/run.sh +20 -0
  143. package/packages/mcp-bibliography/scholarly_formatters.py +83 -0
  144. package/packages/mcp-bibliography/server.py +1857 -0
  145. package/packages/mcp-bibliography/tools/__init__.py +28 -0
  146. package/packages/mcp-bibliography/tools/_registry.py +19 -0
  147. package/packages/mcp-bibliography/tools/altmetric.py +107 -0
  148. package/packages/mcp-bibliography/tools/core.py +92 -0
  149. package/packages/mcp-bibliography/tools/dblp.py +52 -0
  150. package/packages/mcp-bibliography/tools/openalex.py +296 -0
  151. package/packages/mcp-bibliography/tools/opencitations.py +102 -0
  152. package/packages/mcp-bibliography/tools/openreview.py +179 -0
  153. package/packages/mcp-bibliography/tools/orcid.py +131 -0
  154. package/packages/mcp-bibliography/tools/scholarly.py +575 -0
  155. package/packages/mcp-bibliography/tools/unpaywall.py +63 -0
  156. package/packages/mcp-bibliography/tools/zenodo.py +123 -0
  157. package/packages/mcp-bibliography/uv.lock +711 -0
  158. package/scripts/setup.sh +143 -0
  159. package/skills/beamer-deck/SKILL.md +199 -0
  160. package/skills/beamer-deck/references/quality-rubric.md +54 -0
  161. package/skills/beamer-deck/references/review-prompts.md +106 -0
  162. package/skills/bib-validate/SKILL.md +261 -0
  163. package/skills/bib-validate/references/council-mode.md +34 -0
  164. package/skills/bib-validate/references/deep-verify.md +79 -0
  165. package/skills/bib-validate/references/fix-mode.md +36 -0
  166. package/skills/bib-validate/references/openalex-verification.md +45 -0
  167. package/skills/bib-validate/references/preprint-check.md +31 -0
  168. package/skills/bib-validate/references/ref-manager-crossref.md +41 -0
  169. package/skills/bib-validate/references/report-template.md +82 -0
  170. package/skills/code-archaeology/SKILL.md +141 -0
  171. package/skills/code-review/SKILL.md +265 -0
  172. package/skills/code-review/references/quality-rubric.md +67 -0
  173. package/skills/consolidate-memory/SKILL.md +208 -0
  174. package/skills/context-status/SKILL.md +126 -0
  175. package/skills/creation-guard/SKILL.md +230 -0
  176. package/skills/devils-advocate/SKILL.md +130 -0
  177. package/skills/devils-advocate/references/competing-hypotheses.md +83 -0
  178. package/skills/init-project/SKILL.md +115 -0
  179. package/skills/init-project-course/references/memory-and-settings.md +92 -0
  180. package/skills/init-project-course/references/organise-templates.md +94 -0
  181. package/skills/init-project-course/skill.md +147 -0
  182. package/skills/init-project-light/skill.md +139 -0
  183. package/skills/init-project-research/SKILL.md +368 -0
  184. package/skills/init-project-research/references/atlas-pipeline-sync.md +70 -0
  185. package/skills/init-project-research/references/atlas-schema.md +81 -0
  186. package/skills/init-project-research/references/confirmation-report.md +39 -0
  187. package/skills/init-project-research/references/domain-profile-template.md +104 -0
  188. package/skills/init-project-research/references/interview-round3.md +34 -0
  189. package/skills/init-project-research/references/literature-discovery.md +43 -0
  190. package/skills/init-project-research/references/scaffold-details.md +197 -0
  191. package/skills/init-project-research/templates/field-calibration.md +60 -0
  192. package/skills/init-project-research/templates/pipeline-manifest.md +63 -0
  193. package/skills/init-project-research/templates/run-all.sh +116 -0
  194. package/skills/init-project-research/templates/seed-files.md +337 -0
  195. package/skills/insights-deck/SKILL.md +151 -0
  196. package/skills/interview-me/SKILL.md +157 -0
  197. package/skills/latex/SKILL.md +141 -0
  198. package/skills/latex/references/latex-configs.md +183 -0
  199. package/skills/latex-autofix/SKILL.md +230 -0
  200. package/skills/latex-autofix/references/known-errors.md +183 -0
  201. package/skills/latex-autofix/references/quality-rubric.md +50 -0
  202. package/skills/latex-health-check/SKILL.md +161 -0
  203. package/skills/learn/SKILL.md +220 -0
  204. package/skills/learn/scripts/validate_skill.py +265 -0
  205. package/skills/lessons-learned/SKILL.md +201 -0
  206. package/skills/literature/SKILL.md +335 -0
  207. package/skills/literature/references/agent-templates.md +393 -0
  208. package/skills/literature/references/bibliometric-apis.md +44 -0
  209. package/skills/literature/references/cli-council-search.md +79 -0
  210. package/skills/literature/references/openalex-api-guide.md +371 -0
  211. package/skills/literature/references/openalex-common-queries.md +381 -0
  212. package/skills/literature/references/openalex-workflows.md +248 -0
  213. package/skills/literature/references/reference-manager-sync.md +36 -0
  214. package/skills/literature/references/scopus-api-guide.md +208 -0
  215. package/skills/literature/references/wos-api-guide.md +308 -0
  216. package/skills/multi-perspective/SKILL.md +311 -0
  217. package/skills/multi-perspective/references/computational-many-analysts.md +77 -0
  218. package/skills/pipeline-manifest/SKILL.md +226 -0
  219. package/skills/pre-submission-report/SKILL.md +153 -0
  220. package/skills/process-reviews/SKILL.md +244 -0
  221. package/skills/process-reviews/references/rr-routing.md +101 -0
  222. package/skills/project-deck/SKILL.md +87 -0
  223. package/skills/project-safety/SKILL.md +135 -0
  224. package/skills/proofread/SKILL.md +254 -0
  225. package/skills/proofread/references/quality-rubric.md +104 -0
  226. package/skills/python-env/SKILL.md +57 -0
  227. package/skills/quarto-deck/SKILL.md +226 -0
  228. package/skills/quarto-deck/references/markdown-format.md +143 -0
  229. package/skills/quarto-deck/references/quality-rubric.md +54 -0
  230. package/skills/save-context/SKILL.md +174 -0
  231. package/skills/session-log/SKILL.md +98 -0
  232. package/skills/shared/concept-validation-gate.md +161 -0
  233. package/skills/shared/council-protocol.md +265 -0
  234. package/skills/shared/distribution-diagnostics.md +164 -0
  235. package/skills/shared/engagement-stratified-sampling.md +218 -0
  236. package/skills/shared/escalation-protocol.md +74 -0
  237. package/skills/shared/external-audit-protocol.md +205 -0
  238. package/skills/shared/intercoder-reliability.md +256 -0
  239. package/skills/shared/mcp-degradation.md +81 -0
  240. package/skills/shared/method-probing-questions.md +163 -0
  241. package/skills/shared/multi-language-conventions.md +143 -0
  242. package/skills/shared/paid-api-safety.md +174 -0
  243. package/skills/shared/palettes.md +90 -0
  244. package/skills/shared/progressive-disclosure.md +92 -0
  245. package/skills/shared/project-documentation-content.md +443 -0
  246. package/skills/shared/project-documentation-format.md +281 -0
  247. package/skills/shared/project-documentation.md +100 -0
  248. package/skills/shared/publication-output.md +138 -0
  249. package/skills/shared/quality-scoring.md +70 -0
  250. package/skills/shared/reference-resolution.md +77 -0
  251. package/skills/shared/research-quality-rubric.md +165 -0
  252. package/skills/shared/rhetoric-principles.md +54 -0
  253. package/skills/shared/skill-design-patterns.md +272 -0
  254. package/skills/shared/skill-index.md +240 -0
  255. package/skills/shared/system-documentation.md +334 -0
  256. package/skills/shared/tikz-rules.md +402 -0
  257. package/skills/shared/validation-tiers.md +121 -0
  258. package/skills/shared/venue-guides/README.md +46 -0
  259. package/skills/shared/venue-guides/cell_press_style.md +483 -0
  260. package/skills/shared/venue-guides/conferences_formatting.md +564 -0
  261. package/skills/shared/venue-guides/cs_conference_style.md +463 -0
  262. package/skills/shared/venue-guides/examples/cell_summary_example.md +247 -0
  263. package/skills/shared/venue-guides/examples/medical_structured_abstract.md +313 -0
  264. package/skills/shared/venue-guides/examples/nature_abstract_examples.md +213 -0
  265. package/skills/shared/venue-guides/examples/neurips_introduction_example.md +245 -0
  266. package/skills/shared/venue-guides/journals_formatting.md +486 -0
  267. package/skills/shared/venue-guides/medical_journal_styles.md +535 -0
  268. package/skills/shared/venue-guides/ml_conference_style.md +556 -0
  269. package/skills/shared/venue-guides/nature_science_style.md +405 -0
  270. package/skills/shared/venue-guides/reviewer_expectations.md +417 -0
  271. package/skills/shared/venue-guides/venue_writing_styles.md +321 -0
  272. package/skills/split-pdf/SKILL.md +172 -0
  273. package/skills/split-pdf/methodology.md +48 -0
  274. package/skills/sync-notion/SKILL.md +93 -0
  275. package/skills/system-audit/SKILL.md +157 -0
  276. package/skills/system-audit/references/sub-agent-prompts.md +294 -0
  277. package/skills/task-management/SKILL.md +131 -0
  278. package/skills/update-focus/SKILL.md +204 -0
  279. package/skills/update-project-doc/SKILL.md +194 -0
  280. package/skills/validate-bib/SKILL.md +242 -0
  281. package/skills/validate-bib/references/council-mode.md +34 -0
  282. package/skills/validate-bib/references/deep-verify.md +71 -0
  283. package/skills/validate-bib/references/openalex-verification.md +45 -0
  284. package/skills/validate-bib/references/preprint-check.md +31 -0
  285. package/skills/validate-bib/references/report-template.md +62 -0
@@ -0,0 +1,161 @@
1
+ # Concept Validation Gate
2
+
3
+ > Shared reference for writing and literature skills. Validates that a research concept is sufficiently developed before investing time in synthesis, drafting, or review. Adapted from CommScribe (Xu 2026).
4
+
5
+ ## Principle
6
+
7
+ **A weak concept produces a weak paper.** Validate the concept before proceeding to literature synthesis or drafting. This prevents wasted effort on poorly defined research questions, missing theoretical framing, or generic AI-sounding proposals.
8
+
9
+ ---
10
+
11
+ ## Validation Requirements
12
+
13
+ | Requirement | Minimum | Why It Matters |
14
+ |-------------|---------|----------------|
15
+ | **Word count** | 300 words | Demonstrates sufficient engagement with the idea |
16
+ | **Citations** | 3 references | Shows grounded knowledge, not speculation |
17
+ | **Research question** | Explicit, specific | Defines scope and testable contribution |
18
+ | **Theoretical framing** | Named framework or lens | Provides analytical structure |
19
+ | **Original voice** | Detected (not generic AI) | Ensures authentic intellectual engagement |
20
+
21
+ ---
22
+
23
+ ## What a Concept Plan Must Address
24
+
25
+ ### 1. Research Question
26
+
27
+ Specific, answerable, and falsifiable.
28
+
29
+ | Quality | Example |
30
+ |---------|---------|
31
+ | ❌ Too vague | "How does AI affect organisations?" |
32
+ | ❌ Too broad | "What is the impact of technology on decision-making?" |
33
+ | ✅ Specific | "How does the introduction of AI decision support change the weighting behaviour of expert panels in multi-criteria evaluation?" |
34
+
35
+ ### 2. Theoretical Framing
36
+
37
+ Name the theory, cite the source, explain how it applies.
38
+
39
+ | Quality | Example |
40
+ |---------|---------|
41
+ | ❌ Missing | "I'll look at decision-making." |
42
+ | ❌ Name-dropped | "This uses prospect theory." |
43
+ | ✅ Engaged | "Drawing on Kahneman and Tversky's (1979) prospect theory, I examine whether AI recommendations shift reference points in expert judgement, potentially amplifying loss aversion in high-stakes MCDM contexts." |
44
+
45
+ ### 3. Literature Context
46
+
47
+ What existing work does this build on or challenge?
48
+
49
+ | Quality | Example |
50
+ |---------|---------|
51
+ | ❌ Generic | "Many scholars have studied this." |
52
+ | ✅ Specific | "While Bansal et al. (2021) examined AI advice-taking in individual decisions, and Sunstein (2019) analysed group polarisation, neither addresses how AI interacts with structured multi-criteria processes where criteria weights are elicited." |
53
+
54
+ ### 4. Contribution Claim
55
+
56
+ What is genuinely new?
57
+
58
+ | Quality | Example |
59
+ |---------|---------|
60
+ | ❌ Vague | "This fills a gap in the literature." |
61
+ | ✅ Specific | "By embedding AI recommendations within a live AHP process, I isolate the mechanism through which AI shifts weight allocations — something prior work has theorised but not tested experimentally." |
62
+
63
+ ### 5. Scope Boundaries
64
+
65
+ What are you explicitly NOT covering?
66
+
67
+ **Example:** "Focus: expert panels in public sector procurement. Excluded: consumer-facing AI recommendations, autonomous systems without human oversight, non-MCDM decision frameworks."
68
+
69
+ ---
70
+
71
+ ## Depth Score
72
+
73
+ Beyond the checklist, assess intellectual depth (0.0–1.0):
74
+
75
+ ### Depth Indicators (presence increases score)
76
+
77
+ | Category | Markers |
78
+ |----------|---------|
79
+ | **Nuance** | "however", "although", "yet", "while", "despite", "conversely" |
80
+ | **Critical thinking** | "gap", "limitation", "critique", "overlooked", "underexplored", "tension" |
81
+ | **Engagement** | "argues", "suggests", "contends", "demonstrates", "challenges", "extends" |
82
+ | **Theory** | "framework", "lens", "perspective", "mechanism", "construct", "typology" |
83
+ | **Methodology** | "method", "approach", "design", "identification", "estimation", "measure" |
84
+
85
+ ### Scoring
86
+
87
+ - Count markers per category
88
+ - Normalise by word count (per 100 words)
89
+ - Weight: Nuance (0.25), Critical thinking (0.25), Engagement (0.20), Theory (0.15), Methodology (0.15)
90
+ - **Target:** depth_score ≥ 0.4
91
+
92
+ ### Interpretation
93
+
94
+ | Score | Assessment |
95
+ |-------|-----------|
96
+ | < 0.2 | Generic — likely AI-generated or insufficiently developed |
97
+ | 0.2–0.4 | Surface-level — needs more critical engagement |
98
+ | 0.4–0.6 | Adequate — meets minimum for proceeding |
99
+ | 0.6–0.8 | Strong — shows genuine intellectual engagement |
100
+ | > 0.8 | Excellent — deep, nuanced, ready for synthesis |
101
+
102
+ ---
103
+
104
+ ## Red Flags
105
+
106
+ Flag these phrases — they signal generic or AI-generated concepts:
107
+
108
+ | Red Flag | Problem |
109
+ |----------|---------|
110
+ | "This paper will explore..." | Generic opener |
111
+ | "In recent years..." | Filler, not specific |
112
+ | "With the rise of..." | Cliché |
113
+ | "The purpose of this research is to..." | Formulaic |
114
+ | "fills a gap in the literature" | Overused claim without specificity |
115
+ | "This is an important topic because..." | Assertion without evidence |
116
+ | "Much has been written about..." | Vague attribution |
117
+
118
+ **Response when detected:** "Your concept sounds generic. Use specific details from your reading to establish your voice and demonstrate genuine engagement with the literature."
119
+
120
+ ---
121
+
122
+ ## Validation Outcomes
123
+
124
+ ### ✅ PASS
125
+
126
+ All requirements met, depth_score ≥ 0.4, no red flags (or red flags are minor).
127
+
128
+ → Proceed to literature synthesis or drafting.
129
+
130
+ ### ⚠️ REVISE
131
+
132
+ 1-2 requirements unmet or depth_score 0.2–0.4.
133
+
134
+ → Provide specific feedback on what to strengthen. Ask for revision before proceeding.
135
+
136
+ ### ❌ FAIL
137
+
138
+ 3+ requirements unmet or depth_score < 0.2.
139
+
140
+ → Concept is not ready. Suggest the user read more in the area, narrow the question, or identify a theoretical framework before returning.
141
+
142
+ ---
143
+
144
+ ## How Skills Use This
145
+
146
+ ### In `/literature` (before synthesis)
147
+
148
+ 1. Request concept plan from user
149
+ 2. Run validation checks
150
+ 3. If PASS → proceed to search and synthesis
151
+ 4. If REVISE/FAIL → return feedback, wait for revision
152
+
153
+
154
+ 1. Check if a validated concept exists (in project's `.planning/` or `CONCEPT.md`)
155
+ 2. If not → run validation gate before drafting
156
+ 3. If yes → use the concept to guide section structure
157
+
158
+ ### In review agents
159
+
160
+ 1. Check whether the paper's introduction meets concept validation standards
161
+ 2. A paper that would FAIL the concept gate has fundamental framing issues → flag as Critical
@@ -0,0 +1,265 @@
1
+ # Council Protocol
2
+
3
+ > Shared protocol for multi-model council mode. Any review agent or skill can opt into this by providing domain-specific system prompts and output formatting. This file defines the generic orchestration flow.
4
+ >
5
+ > **Included backend:** `cli-council` (local CLI tools, free with existing subscriptions). An optional API backend (`llm-council` via OpenRouter) is available separately — see below.
6
+
7
+ ## Core Concept: Cross-Model Agentic Invocation
8
+
9
+ Claude Code can invoke other LLM providers' CLI tools as subprocess reviewers — a different model reviews work that Claude produced, providing genuine architectural diversity. The system is **extensible**: any CLI tool that accepts a prompt and returns text can be wrapped as a backend (~20 lines of Python following the `BackendSpec` pattern in `packages/cli-council/`). Available backends change as subscriptions change; the architecture does not.
10
+
11
+ ## What Council Mode Is
12
+
13
+ Council mode coordinates this cross-model capability into a structured 3-stage deliberation:
14
+
15
+ 1. **Stage 1: Independent Assessments** — N models (typically 3, each from a different provider) independently evaluate the same artifact using the same instructions
16
+ 2. **Stage 2: Anonymised Peer Review** — each model evaluates the others' assessments without knowing which model produced which
17
+ 3. **Stage 3: Chairman Synthesis** — a chairman model reads everything and produces the final report
18
+
19
+ The key insight: genuine model diversity (different architectures, training data, biases) surfaces issues that any single model — or even multiple instances of the same model — would miss.
20
+
21
+ ## Infrastructure
22
+
23
+ ### CLI Backend: `cli-council` (Included)
24
+
25
+ Package: `packages/cli-council/`
26
+
27
+ - `CouncilRunner` — orchestrator that invokes CLI backends via subprocess
28
+ - Pluggable backends: `GeminiBackend`, `ClaudeBackend`, and a dormant `CodexBackend` (OpenAI subscription cancelled Mar 2026; resubscribing would restore it). New backends follow the same `BackendSpec` pattern.
29
+ - `CouncilResult` — Pydantic models for text-based results
30
+ - CLI — `python -m cli_council` for standalone use
31
+ - Uses existing subscriptions — no per-token API costs
32
+ - **Currently active backends:** Gemini (`gemini -p`), Claude (`claude -p`)
33
+ - **Best for:** Ad-hoc reviews, research tasks, quick multi-perspective opinions
34
+
35
+ ### API Backend: `llm-council` (Optional, Separate Install)
36
+
37
+ > Not included in this repo. Install separately: `pip install llm-council` or clone from GitHub.
38
+
39
+ - `LLMClient` — generic async OpenRouter client with JSON/text chat and retry logic
40
+ - `CouncilService` — 3-stage orchestration engine with customisable Stage 2/3 prompts
41
+ - `CouncilResult` — Pydantic models for structured JSON results
42
+ - CLI — `python -m llm_council` for standalone use
43
+ - Requires `OPENROUTER_API_KEY` in the environment
44
+ - **Best for:** Automated pipelines, structured JSON output, programmatic integration
45
+
46
+ ### Choosing a Backend
47
+
48
+ | Factor | `cli-council` (included) | `llm-council` (separate) |
49
+ |--------|--------------------------|--------------------------|
50
+ | Cost | Subscription-included | Per-token (OpenRouter) |
51
+ | Output format | Free-form text | Structured JSON |
52
+ | Reliability | Variable (CLI output parsing) | High (API contracts) |
53
+ | Speed | Slower (subprocess overhead) | Fast (parallel async HTTP) |
54
+ | Model control | Whatever CLIs support | Full OpenRouter catalogue |
55
+ | Offline | Partially (Claude -p works offline) | No |
56
+
57
+ **Default:** Use `cli-council` (included and free). Use `llm-council` only if you need structured JSON output or are running in an automated pipeline.
58
+
59
+ ## When to Use
60
+
61
+ - Pre-submission quality checks (high stakes)
62
+ - When thoroughness matters more than speed
63
+ - When the user explicitly requests "council mode", "council review", or "thorough review"
64
+ - Never the default — standard single-reviewer mode remains the default for all consumers
65
+
66
+ ## Parallel Independent Review
67
+
68
+ Beyond multi-model council mode, review agents can also be launched **in parallel** within a single Claude Code session for maximum coverage from different perspectives:
69
+
70
+ 1. **Pre-flight:** Launch `fatal-error-check` first (haiku model, ~15-30 seconds). If it returns FAIL, fix the fatal errors before proceeding.
71
+ 2. **Parallel launch:** If the pre-flight passes, launch all three review agents simultaneously in a **single message** with three Agent tool calls:
72
+ - `paper-critic` — adversarial LaTeX audit (grammar, notation, citation, tone, LaTeX, TikZ)
73
+ - `domain-reviewer` — substantive correctness (assumptions, derivations, citations, code-theory, backward logic)
74
+ - `referee2-reviewer` — full Reviewer 2 audit (identification, methods, robustness, presentation, scholarly rigour)
75
+ 3. **Synthesise:** Once all three agents return, run `/synthesise-reviews` to cross-reference issues, apply consensus escalation, and produce a unified `REVISION-PLAN.md`.
76
+
77
+ This pattern maximises coverage by combining complementary review perspectives. Each agent has different check dimensions and catches different classes of issues. Parallel launch saves time compared to sequential runs.
78
+
79
+ **When to use parallel review vs council mode:**
80
+
81
+ | Scenario | Use |
82
+ |----------|-----|
83
+ | Maximum coverage from different review perspectives | Parallel independent review |
84
+ | Model diversity (different LLM architectures finding different issues) | Council mode |
85
+ | Both perspectives AND model diversity | Parallel review first, then council mode on the most Critical workstream |
86
+ | Quick pre-submission check | Fatal-error-check only |
87
+
88
+ ## Prerequisites for a Consumer
89
+
90
+ An agent or skill that supports council mode must provide:
91
+
92
+ | What | Where | Purpose |
93
+ |------|-------|---------|
94
+ | **System prompt builder** | Consumer's `references/council-personas.md` | How to construct the system prompt sent to all models |
95
+ | **Output formatter** | Consumer's `references/council-prompts.md` | Stage 3 chairman prompt template + output format |
96
+ | **Council mode section** | Consumer's agent/skill body | Short section noting support + pointer to reference files |
97
+ | **Trigger phrases** | Consumer's frontmatter description/examples | How the user activates council mode |
98
+
99
+ ## Orchestration Protocol
100
+
101
+ The **main session** orchestrates council mode. Review agents cannot orchestrate themselves (they lack Bash). When council mode is triggered:
102
+
103
+ ### Pre-flight
104
+
105
+ 1. Run the consumer's standard pre-checks and hard gates
106
+ 2. If any gate fails, report immediately — do not invoke the council (save cost)
107
+ 3. Collect all source material (file contents, logs, rubrics) into a system prompt and user message
108
+ 4. Read the consumer's reference files for prompt construction guidance
109
+
110
+ ### Stage 1: Independent Assessments
111
+
112
+ The main session invokes the `llm-council` package (via CLI or Python script). The library:
113
+
114
+ 1. Sends the system prompt + user message to N different LLM models via OpenRouter
115
+ 2. Each model independently produces a JSON assessment
116
+ 3. All calls are parallel (async)
117
+ 4. Failed models are logged and skipped — the council proceeds with available responses
118
+
119
+ **Default models:** `anthropic/claude-sonnet-4.5`, `openai/gpt-5`, `google/gemini-2.5-pro`
120
+
121
+ ### Stage 2: Anonymised Peer Review
122
+
123
+ The library automatically:
124
+
125
+ 1. Labels Stage 1 assessments as "Assessment A", "Assessment B", etc. (anonymised)
126
+ 2. Sends all assessments to each model for cross-evaluation
127
+ 3. Each model evaluates the others' work, identifies agreements/disagreements, and provides a ranking
128
+ 4. Rankings are parsed and aggregated
129
+
130
+ **Model:** Same models as Stage 1 (each reviews the others' work).
131
+
132
+ ### Stage 3: Chairman Synthesis
133
+
134
+ The library:
135
+
136
+ 1. Sends all assessments and peer reviews to the chairman model
137
+ 2. The chairman considers all inputs and produces a single synthesised response
138
+ 3. The response follows the consumer's required output schema
139
+
140
+ **Default chairman:** `anthropic/claude-sonnet-4.5`
141
+
142
+ ### Write Output
143
+
144
+ The main session receives the `CouncilResult` JSON and formats it into the consumer's standard output (e.g., `CRITIC-REPORT.md` for paper-critic). The report uses the consumer's standard format with two sections appended:
145
+
146
+ ```markdown
147
+ ## Council Notes
148
+
149
+ ### Agreement Summary
150
+ - [N] issues confirmed by all reviewers
151
+ - [N] issues confirmed by majority
152
+ - [N] issues from single reviewer (validated in cross-review)
153
+ - [N] disputed issues (marked [DISPUTED])
154
+
155
+ ### Aggregate Rankings
156
+ | Assessment | Model | Avg Rank | Rankings Count |
157
+ |------------|-------|----------|----------------|
158
+ | Assessment A | [model name] | X.X | N |
159
+ | Assessment B | [model name] | X.X | N |
160
+ | Assessment C | [model name] | X.X | N |
161
+
162
+ ## Council Metadata
163
+ - **Mode:** Council ([N] models + peer review + chairman)
164
+ - **Models:** [list of model IDs used]
165
+ - **Chairman:** [chairman model ID]
166
+ - **Timing:** Stage 1: Xms, Stage 2: Xms, Stage 3: Xms, Total: Xms
167
+ - **Date:** YYYY-MM-DD
168
+ ```
169
+
170
+ These sections are appended **after** the consumer's standard report content. Downstream consumers (e.g., fixer agent) that parse only the standard sections are unaffected.
171
+
172
+ ## CLI Invocation
173
+
174
+ ### Option A: CLI Backend (`cli-council` — Included)
175
+
176
+ For ad-hoc reviews using existing subscriptions (no API cost):
177
+
178
+ ```bash
179
+ cd "packages/cli-council"
180
+ uv run python -m cli_council \
181
+ --prompt-file /tmp/council-prompt.txt \
182
+ --context-file /tmp/council-context.txt \
183
+ --output /tmp/council-result.json \
184
+ --output-md /tmp/council-report.md \
185
+ --chairman claude \
186
+ --timeout 180
187
+ ```
188
+
189
+ - Write the paper content / review instructions to `--context-file`, and the specific question to `--prompt-file`
190
+ - Output is free-form text — the markdown report (`--output-md`) is usually more useful than JSON
191
+ - The chairman backend defaults to `claude` (since we're already in Claude Code)
192
+
193
+ ### Option B: API Backend (`llm-council` — Separate Install)
194
+
195
+ > Requires separate installation: `pip install llm-council` and an `OPENROUTER_API_KEY`.
196
+
197
+ For structured JSON output and automated pipelines:
198
+
199
+ ```bash
200
+ uv run python -m llm_council \
201
+ --system-prompt-file /tmp/council-system.txt \
202
+ --user-message-file /tmp/council-user.txt \
203
+ --models "anthropic/claude-sonnet-4.5,openai/gpt-5,google/gemini-2.5-pro" \
204
+ --chairman "anthropic/claude-sonnet-4.5" \
205
+ --output /tmp/council-result.json
206
+ ```
207
+
208
+ For advanced cases (custom Stage 2/3 prompts), write a small Python script that imports `llm_council` and calls `CouncilService.run_council()` with `stage2_system` and `stage3_prompt_builder` parameters.
209
+
210
+ ## Issue Resolution Rules (Chairman)
211
+
212
+ The consumer's chairman prompt should instruct the chairman to apply these rules:
213
+
214
+ | Situation | Action |
215
+ |-----------|--------|
216
+ | Issue confirmed by 2+ models | Retain at the **highest** agreed severity |
217
+ | Issue from 1 model, validated in peer review | Retain at the original severity |
218
+ | Issue from 1 model, disputed in peer review | Retain with `[DISPUTED]` tag; chairman makes final severity call |
219
+ | Issue found only in peer review (missed initially) | Add as a new finding |
220
+ | Conflicting severity assessments | Chairman decides; notes the range in the issue description |
221
+
222
+ **Scoring:** The chairman produces an independent score informed by all inputs — not a mechanical average.
223
+
224
+ ## Model Configuration
225
+
226
+ | Parameter | Built-in Default | Override |
227
+ |-----------|-----------------|---------|
228
+ | Stage 1 models | `anthropic/claude-sonnet-4.5`, `openai/gpt-5`, `google/gemini-2.5-pro` | `--models` CLI flag or user config |
229
+ | Chairman model | `anthropic/claude-sonnet-4.5` | `--chairman` CLI flag or user config |
230
+ | Max tokens | 4096 | `--max-tokens` CLI flag |
231
+
232
+ **User defaults** persist to `~/.config/llm-council/config.json` and override built-in defaults. Manage via `llm-council models --set-defaults` / `--set-chairman` / `--reset`, or interactively with `llm-council models --pricing` to review options first.
233
+
234
+ The library's `config.py` contains the full model registry (17 models across Anthropic, OpenAI, Google) with tiers and live pricing.
235
+
236
+ ## Cost Considerations
237
+
238
+ Council mode costs significantly more than standard mode because it calls N models for Stage 1, N models for Stage 2, and 1 model for Stage 3 (total: 2N+1 API calls). With 3 models:
239
+
240
+ - **Standard mode:** 1 agent call (free — uses Claude Code context)
241
+ - **Council mode:** 7 OpenRouter API calls (3 + 3 + 1)
242
+
243
+ Pricing depends on the models chosen. Check OpenRouter for current rates. Use council mode when thoroughness justifies the cost — typically pre-submission or high-stakes reviews.
244
+
245
+ ## Persona Support (Optional)
246
+
247
+ Each consumer can define **personas** in `references/council-personas.md` — distinct reviewer emphases that are prepended to the system prompt. Since council mode already uses different LLM providers (which bring natural perspective diversity), personas are optional but can add further differentiation.
248
+
249
+ Current approach: the same system prompt goes to all models. Personas are documented as reference material describing what each model *tends to focus on* based on its architecture. Future extension: per-model system prompt variants via the library's API.
250
+
251
+ ## Consumers
252
+
253
+ | Consumer | CLI (`cli-council`) | API (`llm-council`) | Notes |
254
+ |----------|---------------------|---------------------|-------|
255
+ | `paper-critic` | Supported | Implemented | First consumer — Technical Rigour, Presentation, Scholarly Standards personas |
256
+ | `referee2-reviewer` | Supported | Supported | 5-audit protocol + council cross-review — highest-value consumer |
257
+ | `domain-reviewer` | Supported | — | Math/assumption checking — different models catch different derivation gaps |
258
+ | `proposal-reviewer` | Supported | — | Feasibility and novelty — different models have different domain knowledge |
259
+ | `peer-reviewer` | Supported | — | Full paper review — the canonical use case for multi-model deliberation |
260
+ | `multi-perspective` | Supported | — | Replaces Claude-only sub-agents with genuine model diversity |
261
+ | `literature` | Implemented | — | Phase 2b (search) and Phase 7 (synthesis) — see skill definition |
262
+ | `devils-advocate` | Supported | — | Round 1/2/3 played by different models for genuine adversarial tension |
263
+ | `proofread` | Supported | — | Lower value — most useful for notation consistency and citation voice balance |
264
+ | `code-review` | Supported | — | Most valuable for domain correctness and cross-language verification |
265
+ | `bib-validate` | Supported | — | Different models have different bibliographic knowledge — catches metadata mismatches |
@@ -0,0 +1,164 @@
1
+ # Distribution Diagnostics Before Model Selection
2
+
3
+ > Shared reference for `/data-analysis` and review agents. Mandatory checks on dependent variables before selecting a statistical model. Prevents misspecification. Adapted from CommDAAF AgentAcademy protocol (Xu 2026).
4
+
5
+ ## Principle
6
+
7
+ **Never run a regression without inspecting the DV distribution first.** OLS on count data, Poisson on overdispersed data, and linear models on zero-inflated outcomes all produce misleading results. Five minutes of diagnostics prevents weeks of wasted analysis.
8
+
9
+ ---
10
+
11
+ ## Mandatory Checks
12
+
13
+ Run these on every dependent variable before model selection:
14
+
15
+ | Diagnostic | What to compute | Why it matters |
16
+ |-----------|----------------|----------------|
17
+ | **Basic stats** | N, mean, median, SD, range | Understand the variable |
18
+ | **Skewness** | `scipy.stats.skew(y)` or `moments::skewness(y)` | \|skew\| > 1 → OLS assumptions likely violated |
19
+ | **Zero proportion** | `sum(y == 0) / N` | > 15% zeros → consider zero-inflated or hurdle models |
20
+ | **Overdispersion** | `var(y) / mean(y)` | > 1.5 → Poisson is wrong, use Negative Binomial |
21
+ | **Normality** | QQ-plot + Shapiro-Wilk (if N < 5000) | Formal test, but visual inspection matters more |
22
+ | **Outliers** | IQR method or robust Mahalanobis distance | Extreme values can dominate OLS estimates |
23
+
24
+ ---
25
+
26
+ ## Model Selection Decision Tree
27
+
28
+ ```
29
+ Is the DV a count (0, 1, 2, ...)?
30
+ ├── Yes → Check overdispersion (var/mean > 1.5?)
31
+ │ ├── Yes → Check zero proportion (> 30%?)
32
+ │ │ ├── Yes → Zero-inflated NB or Hurdle model
33
+ │ │ └── No → Negative Binomial
34
+ │ └── No → Check zero proportion (> 30%?)
35
+ │ ├── Yes → Zero-inflated Poisson
36
+ │ └── No → Poisson
37
+ ├── Is the DV a proportion or bounded [0, 1]?
38
+ │ └── Yes → Beta regression (or fractional logit)
39
+ ├── Is the DV binary (0/1)?
40
+ │ └── Yes → Logistic regression
41
+ ├── Is the DV ordinal (ordered categories)?
42
+ │ └── Yes → Ordered logistic/probit
43
+ └── Is the DV continuous?
44
+ └── Check skewness and normality of residuals
45
+ ├── Residuals ~normal → OLS
46
+ ├── Highly skewed DV → Log-transform, then OLS (report both)
47
+ └── Heavy tails → Robust regression or quantile regression
48
+ ```
49
+
50
+ **Key rule:** Never use OLS on raw counts without explicit justification. Social media engagement, citation counts, survey response counts — these are almost never normally distributed.
51
+
52
+ ---
53
+
54
+ ## Effect Size Reporting
55
+
56
+ ### For count models (NB, Poisson): report Incidence Rate Ratios (IRR)
57
+
58
+ | IRR | Interpretation |
59
+ |-----|---------------|
60
+ | 1.0 | No effect |
61
+ | 1.2 | 20% increase |
62
+ | 1.5 | 50% increase |
63
+ | 2.0 | Double |
64
+ | 0.5 | Half |
65
+
66
+ **Always translate to practical meaning:** "Posts with frame X received 50% more engagement (IRR = 1.50, 95% CI [1.22, 1.84])" — not just "β = 0.41, p < 0.01".
67
+
68
+ ### For OLS: report standardised coefficients alongside raw
69
+
70
+ Help readers judge magnitude, not just significance.
71
+
72
+ ### For logistic: report odds ratios AND predicted probabilities
73
+
74
+ Odds ratios are hard to interpret. Show predicted probability at meaningful values of the IV.
75
+
76
+ ---
77
+
78
+ ## Multiple Testing
79
+
80
+ When testing multiple predictors or outcomes:
81
+
82
+ | Method | When to use |
83
+ |--------|------------|
84
+ | **Bonferroni** | Conservative; few tests (< 10) |
85
+ | **Holm** | Less conservative; sequential rejection |
86
+ | **FDR (Benjamini-Hochberg)** | Many tests (> 10); controls false discovery rate |
87
+
88
+ **Always report both raw and adjusted p-values.** Let readers assess.
89
+
90
+ ---
91
+
92
+ ## Implementation
93
+
94
+ ### Python
95
+
96
+ ```python
97
+ import numpy as np
98
+ from scipy import stats
99
+
100
+ def distribution_diagnostics(y, name="DV"):
101
+ """Run mandatory diagnostics before model selection."""
102
+ n = len(y)
103
+ skewness = stats.skew(y)
104
+ pct_zeros = np.sum(y == 0) / n * 100
105
+ var_mean = np.var(y) / np.mean(y) if np.mean(y) > 0 else float('inf')
106
+
107
+ diagnostics = {
108
+ 'n': n, 'mean': np.mean(y), 'median': np.median(y),
109
+ 'sd': np.std(y), 'skewness': skewness,
110
+ 'pct_zeros': pct_zeros, 'var_mean_ratio': var_mean,
111
+ }
112
+
113
+ # Model recommendation
114
+ if pct_zeros > 30:
115
+ diagnostics['recommendation'] = 'Zero-inflated model or Hurdle'
116
+ elif var_mean > 1.5:
117
+ diagnostics['recommendation'] = 'Negative Binomial'
118
+ elif abs(skewness) > 1:
119
+ diagnostics['recommendation'] = 'Log-transform or GLM'
120
+ else:
121
+ diagnostics['recommendation'] = 'OLS (verify residuals)'
122
+
123
+ return diagnostics
124
+ ```
125
+
126
+ ### R
127
+
128
+ ```r
129
+ distribution_diagnostics <- function(y, name = "DV") {
130
+ n <- length(y)
131
+ skew <- moments::skewness(y)
132
+ pct_zeros <- sum(y == 0) / n * 100
133
+ var_mean <- var(y) / mean(y)
134
+
135
+ cat(sprintf("=== %s (N=%d) ===\n", name, n))
136
+ cat(sprintf("Mean: %.3f | Median: %.3f | SD: %.3f\n", mean(y), median(y), sd(y)))
137
+ cat(sprintf("Skewness: %.3f | Zeros: %.1f%% | Var/Mean: %.3f\n", skew, pct_zeros, var_mean))
138
+
139
+ if (pct_zeros > 30) cat("→ Zero-inflated or Hurdle model\n")
140
+ else if (var_mean > 1.5) cat("→ Negative Binomial\n")
141
+ else if (abs(skew) > 1) cat("→ Log-transform or GLM\n")
142
+ else cat("→ OLS (verify residuals)\n")
143
+ }
144
+ ```
145
+
146
+ ---
147
+
148
+ ## Integration
149
+
150
+ ### In `/data-analysis` Phase 1 (EDA)
151
+
152
+ Run `distribution_diagnostics()` on every DV and key IVs before proceeding to estimation. If the diagnostics suggest a non-OLS model, flag this before the user locks their specification.
153
+
154
+ ### In review agents
155
+
156
+ Check whether the paper reports distribution diagnostics or justifies model choice. A paper using OLS on count data without justification → flag as Major issue.
157
+
158
+ ### Validation tier interaction
159
+
160
+ | Tier | Requirement |
161
+ |------|------------|
162
+ | 🟢 Exploratory | Run diagnostics, note recommendation |
163
+ | 🟡 Pilot | Run diagnostics, justify model choice in notes |
164
+ | 🔴 Publication | Run diagnostics, report in paper, compare 2+ model families |