flonat-research 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/domain-reviewer.md +336 -0
- package/.claude/agents/fixer.md +226 -0
- package/.claude/agents/paper-critic.md +370 -0
- package/.claude/agents/peer-reviewer.md +289 -0
- package/.claude/agents/proposal-reviewer.md +215 -0
- package/.claude/agents/referee2-reviewer.md +367 -0
- package/.claude/agents/references/journal-referee-profiles.md +354 -0
- package/.claude/agents/references/paper-critic/council-personas.md +77 -0
- package/.claude/agents/references/paper-critic/council-prompts.md +198 -0
- package/.claude/agents/references/peer-reviewer/report-template.md +199 -0
- package/.claude/agents/references/peer-reviewer/sa-prompts.md +260 -0
- package/.claude/agents/references/peer-reviewer/security-scan.md +188 -0
- package/.claude/agents/references/proposal-reviewer/report-template.md +144 -0
- package/.claude/agents/references/proposal-reviewer/sa-prompts.md +149 -0
- package/.claude/agents/references/referee-config.md +114 -0
- package/.claude/agents/references/referee2-reviewer/audit-checklists.md +287 -0
- package/.claude/agents/references/referee2-reviewer/report-template.md +334 -0
- package/.claude/rules/design-before-results.md +52 -0
- package/.claude/rules/ignore-agents-md.md +17 -0
- package/.claude/rules/ignore-gemini-md.md +17 -0
- package/.claude/rules/lean-claude-md.md +45 -0
- package/.claude/rules/learn-tags.md +99 -0
- package/.claude/rules/overleaf-separation.md +67 -0
- package/.claude/rules/plan-first.md +175 -0
- package/.claude/rules/read-docs-first.md +50 -0
- package/.claude/rules/scope-discipline.md +28 -0
- package/.claude/settings.json +125 -0
- package/.context/current-focus.md +33 -0
- package/.context/preferences/priorities.md +36 -0
- package/.context/preferences/task-naming.md +28 -0
- package/.context/profile.md +29 -0
- package/.context/projects/_index.md +41 -0
- package/.context/projects/papers/nudge-exp.md +22 -0
- package/.context/projects/papers/uncertainty.md +31 -0
- package/.context/resources/claude-scientific-writer-review.md +48 -0
- package/.context/resources/cunningham-multi-analyst-agents.md +104 -0
- package/.context/resources/cunningham-multilang-code-audit.md +62 -0
- package/.context/resources/google-ai-co-scientist-review.md +72 -0
- package/.context/resources/karpathy-llm-council-review.md +58 -0
- package/.context/resources/multi-coder-reliability-protocol.md +175 -0
- package/.context/resources/pedro-santanna-takeaways.md +96 -0
- package/.context/resources/venue-rankings/abs_ajg_2024.csv +1823 -0
- package/.context/resources/venue-rankings/abs_ajg_2024_econ.csv +356 -0
- package/.context/resources/venue-rankings/cabs_4_4star_theory.csv +40 -0
- package/.context/resources/venue-rankings/core_2026.csv +801 -0
- package/.context/resources/venue-rankings.md +147 -0
- package/.context/workflows/README.md +69 -0
- package/.context/workflows/daily-review.md +91 -0
- package/.context/workflows/meeting-actions.md +108 -0
- package/.context/workflows/replication-protocol.md +155 -0
- package/.context/workflows/weekly-review.md +113 -0
- package/.mcp-server-biblio/formatters.py +158 -0
- package/.mcp-server-biblio/pyproject.toml +11 -0
- package/.mcp-server-biblio/server.py +678 -0
- package/.mcp-server-biblio/sources/__init__.py +14 -0
- package/.mcp-server-biblio/sources/base.py +73 -0
- package/.mcp-server-biblio/sources/formatters.py +83 -0
- package/.mcp-server-biblio/sources/models.py +22 -0
- package/.mcp-server-biblio/sources/multi_source.py +243 -0
- package/.mcp-server-biblio/sources/openalex_source.py +183 -0
- package/.mcp-server-biblio/sources/scopus_source.py +309 -0
- package/.mcp-server-biblio/sources/wos_source.py +508 -0
- package/.mcp-server-biblio/uv.lock +896 -0
- package/.scripts/README.md +161 -0
- package/.scripts/ai_pattern_density.py +446 -0
- package/.scripts/conf +445 -0
- package/.scripts/config.py +122 -0
- package/.scripts/count_inventory.py +275 -0
- package/.scripts/daily_digest.py +288 -0
- package/.scripts/done +177 -0
- package/.scripts/extract_meeting_actions.py +223 -0
- package/.scripts/focus +176 -0
- package/.scripts/generate-codex-agents-md.py +217 -0
- package/.scripts/inbox +194 -0
- package/.scripts/notion_helpers.py +325 -0
- package/.scripts/openalex/query_helpers.py +306 -0
- package/.scripts/papers +227 -0
- package/.scripts/query +223 -0
- package/.scripts/session-history.py +201 -0
- package/.scripts/skill-health.py +516 -0
- package/.scripts/skill-log-miner.py +273 -0
- package/.scripts/sync-to-codex.sh +252 -0
- package/.scripts/task +213 -0
- package/.scripts/tasks +190 -0
- package/.scripts/week +206 -0
- package/CLAUDE.md +197 -0
- package/LICENSE +21 -0
- package/MEMORY.md +38 -0
- package/README.md +269 -0
- package/docs/agents.md +44 -0
- package/docs/bibliography-setup.md +55 -0
- package/docs/council-mode.md +36 -0
- package/docs/getting-started.md +245 -0
- package/docs/hooks.md +38 -0
- package/docs/mcp-servers.md +82 -0
- package/docs/notion-setup.md +109 -0
- package/docs/rules.md +33 -0
- package/docs/scripts.md +303 -0
- package/docs/setup-overview/setup-overview.pdf +0 -0
- package/docs/skills.md +70 -0
- package/docs/system.md +159 -0
- package/hooks/block-destructive-git.sh +66 -0
- package/hooks/context-monitor.py +114 -0
- package/hooks/postcompact-restore.py +157 -0
- package/hooks/precompact-autosave.py +181 -0
- package/hooks/promise-checker.sh +124 -0
- package/hooks/protect-source-files.sh +81 -0
- package/hooks/resume-context-loader.sh +53 -0
- package/hooks/startup-context-loader.sh +102 -0
- package/package.json +51 -0
- package/packages/cli-council/.github/workflows/claude-code-review.yml +44 -0
- package/packages/cli-council/.github/workflows/claude.yml +50 -0
- package/packages/cli-council/README.md +100 -0
- package/packages/cli-council/pyproject.toml +43 -0
- package/packages/cli-council/src/cli_council/__init__.py +19 -0
- package/packages/cli-council/src/cli_council/__main__.py +185 -0
- package/packages/cli-council/src/cli_council/backends/__init__.py +8 -0
- package/packages/cli-council/src/cli_council/backends/base.py +81 -0
- package/packages/cli-council/src/cli_council/backends/claude.py +25 -0
- package/packages/cli-council/src/cli_council/backends/codex.py +27 -0
- package/packages/cli-council/src/cli_council/backends/gemini.py +26 -0
- package/packages/cli-council/src/cli_council/checkpoint.py +212 -0
- package/packages/cli-council/src/cli_council/config.py +51 -0
- package/packages/cli-council/src/cli_council/council.py +391 -0
- package/packages/cli-council/src/cli_council/models.py +46 -0
- package/packages/llm-council/.github/workflows/claude-code-review.yml +44 -0
- package/packages/llm-council/.github/workflows/claude.yml +50 -0
- package/packages/llm-council/README.md +453 -0
- package/packages/llm-council/pyproject.toml +42 -0
- package/packages/llm-council/src/llm_council/__init__.py +23 -0
- package/packages/llm-council/src/llm_council/__main__.py +259 -0
- package/packages/llm-council/src/llm_council/checkpoint.py +193 -0
- package/packages/llm-council/src/llm_council/client.py +253 -0
- package/packages/llm-council/src/llm_council/config.py +232 -0
- package/packages/llm-council/src/llm_council/council.py +482 -0
- package/packages/llm-council/src/llm_council/models.py +46 -0
- package/packages/mcp-bibliography/MEMORY.md +31 -0
- package/packages/mcp-bibliography/_app.py +226 -0
- package/packages/mcp-bibliography/formatters.py +158 -0
- package/packages/mcp-bibliography/log/2026-03-13-2100.md +35 -0
- package/packages/mcp-bibliography/pyproject.toml +15 -0
- package/packages/mcp-bibliography/run.sh +20 -0
- package/packages/mcp-bibliography/scholarly_formatters.py +83 -0
- package/packages/mcp-bibliography/server.py +1857 -0
- package/packages/mcp-bibliography/tools/__init__.py +28 -0
- package/packages/mcp-bibliography/tools/_registry.py +19 -0
- package/packages/mcp-bibliography/tools/altmetric.py +107 -0
- package/packages/mcp-bibliography/tools/core.py +92 -0
- package/packages/mcp-bibliography/tools/dblp.py +52 -0
- package/packages/mcp-bibliography/tools/openalex.py +296 -0
- package/packages/mcp-bibliography/tools/opencitations.py +102 -0
- package/packages/mcp-bibliography/tools/openreview.py +179 -0
- package/packages/mcp-bibliography/tools/orcid.py +131 -0
- package/packages/mcp-bibliography/tools/scholarly.py +575 -0
- package/packages/mcp-bibliography/tools/unpaywall.py +63 -0
- package/packages/mcp-bibliography/tools/zenodo.py +123 -0
- package/packages/mcp-bibliography/uv.lock +711 -0
- package/scripts/setup.sh +143 -0
- package/skills/beamer-deck/SKILL.md +199 -0
- package/skills/beamer-deck/references/quality-rubric.md +54 -0
- package/skills/beamer-deck/references/review-prompts.md +106 -0
- package/skills/bib-validate/SKILL.md +261 -0
- package/skills/bib-validate/references/council-mode.md +34 -0
- package/skills/bib-validate/references/deep-verify.md +79 -0
- package/skills/bib-validate/references/fix-mode.md +36 -0
- package/skills/bib-validate/references/openalex-verification.md +45 -0
- package/skills/bib-validate/references/preprint-check.md +31 -0
- package/skills/bib-validate/references/ref-manager-crossref.md +41 -0
- package/skills/bib-validate/references/report-template.md +82 -0
- package/skills/code-archaeology/SKILL.md +141 -0
- package/skills/code-review/SKILL.md +265 -0
- package/skills/code-review/references/quality-rubric.md +67 -0
- package/skills/consolidate-memory/SKILL.md +208 -0
- package/skills/context-status/SKILL.md +126 -0
- package/skills/creation-guard/SKILL.md +230 -0
- package/skills/devils-advocate/SKILL.md +130 -0
- package/skills/devils-advocate/references/competing-hypotheses.md +83 -0
- package/skills/init-project/SKILL.md +115 -0
- package/skills/init-project-course/references/memory-and-settings.md +92 -0
- package/skills/init-project-course/references/organise-templates.md +94 -0
- package/skills/init-project-course/skill.md +147 -0
- package/skills/init-project-light/skill.md +139 -0
- package/skills/init-project-research/SKILL.md +368 -0
- package/skills/init-project-research/references/atlas-pipeline-sync.md +70 -0
- package/skills/init-project-research/references/atlas-schema.md +81 -0
- package/skills/init-project-research/references/confirmation-report.md +39 -0
- package/skills/init-project-research/references/domain-profile-template.md +104 -0
- package/skills/init-project-research/references/interview-round3.md +34 -0
- package/skills/init-project-research/references/literature-discovery.md +43 -0
- package/skills/init-project-research/references/scaffold-details.md +197 -0
- package/skills/init-project-research/templates/field-calibration.md +60 -0
- package/skills/init-project-research/templates/pipeline-manifest.md +63 -0
- package/skills/init-project-research/templates/run-all.sh +116 -0
- package/skills/init-project-research/templates/seed-files.md +337 -0
- package/skills/insights-deck/SKILL.md +151 -0
- package/skills/interview-me/SKILL.md +157 -0
- package/skills/latex/SKILL.md +141 -0
- package/skills/latex/references/latex-configs.md +183 -0
- package/skills/latex-autofix/SKILL.md +230 -0
- package/skills/latex-autofix/references/known-errors.md +183 -0
- package/skills/latex-autofix/references/quality-rubric.md +50 -0
- package/skills/latex-health-check/SKILL.md +161 -0
- package/skills/learn/SKILL.md +220 -0
- package/skills/learn/scripts/validate_skill.py +265 -0
- package/skills/lessons-learned/SKILL.md +201 -0
- package/skills/literature/SKILL.md +335 -0
- package/skills/literature/references/agent-templates.md +393 -0
- package/skills/literature/references/bibliometric-apis.md +44 -0
- package/skills/literature/references/cli-council-search.md +79 -0
- package/skills/literature/references/openalex-api-guide.md +371 -0
- package/skills/literature/references/openalex-common-queries.md +381 -0
- package/skills/literature/references/openalex-workflows.md +248 -0
- package/skills/literature/references/reference-manager-sync.md +36 -0
- package/skills/literature/references/scopus-api-guide.md +208 -0
- package/skills/literature/references/wos-api-guide.md +308 -0
- package/skills/multi-perspective/SKILL.md +311 -0
- package/skills/multi-perspective/references/computational-many-analysts.md +77 -0
- package/skills/pipeline-manifest/SKILL.md +226 -0
- package/skills/pre-submission-report/SKILL.md +153 -0
- package/skills/process-reviews/SKILL.md +244 -0
- package/skills/process-reviews/references/rr-routing.md +101 -0
- package/skills/project-deck/SKILL.md +87 -0
- package/skills/project-safety/SKILL.md +135 -0
- package/skills/proofread/SKILL.md +254 -0
- package/skills/proofread/references/quality-rubric.md +104 -0
- package/skills/python-env/SKILL.md +57 -0
- package/skills/quarto-deck/SKILL.md +226 -0
- package/skills/quarto-deck/references/markdown-format.md +143 -0
- package/skills/quarto-deck/references/quality-rubric.md +54 -0
- package/skills/save-context/SKILL.md +174 -0
- package/skills/session-log/SKILL.md +98 -0
- package/skills/shared/concept-validation-gate.md +161 -0
- package/skills/shared/council-protocol.md +265 -0
- package/skills/shared/distribution-diagnostics.md +164 -0
- package/skills/shared/engagement-stratified-sampling.md +218 -0
- package/skills/shared/escalation-protocol.md +74 -0
- package/skills/shared/external-audit-protocol.md +205 -0
- package/skills/shared/intercoder-reliability.md +256 -0
- package/skills/shared/mcp-degradation.md +81 -0
- package/skills/shared/method-probing-questions.md +163 -0
- package/skills/shared/multi-language-conventions.md +143 -0
- package/skills/shared/paid-api-safety.md +174 -0
- package/skills/shared/palettes.md +90 -0
- package/skills/shared/progressive-disclosure.md +92 -0
- package/skills/shared/project-documentation-content.md +443 -0
- package/skills/shared/project-documentation-format.md +281 -0
- package/skills/shared/project-documentation.md +100 -0
- package/skills/shared/publication-output.md +138 -0
- package/skills/shared/quality-scoring.md +70 -0
- package/skills/shared/reference-resolution.md +77 -0
- package/skills/shared/research-quality-rubric.md +165 -0
- package/skills/shared/rhetoric-principles.md +54 -0
- package/skills/shared/skill-design-patterns.md +272 -0
- package/skills/shared/skill-index.md +240 -0
- package/skills/shared/system-documentation.md +334 -0
- package/skills/shared/tikz-rules.md +402 -0
- package/skills/shared/validation-tiers.md +121 -0
- package/skills/shared/venue-guides/README.md +46 -0
- package/skills/shared/venue-guides/cell_press_style.md +483 -0
- package/skills/shared/venue-guides/conferences_formatting.md +564 -0
- package/skills/shared/venue-guides/cs_conference_style.md +463 -0
- package/skills/shared/venue-guides/examples/cell_summary_example.md +247 -0
- package/skills/shared/venue-guides/examples/medical_structured_abstract.md +313 -0
- package/skills/shared/venue-guides/examples/nature_abstract_examples.md +213 -0
- package/skills/shared/venue-guides/examples/neurips_introduction_example.md +245 -0
- package/skills/shared/venue-guides/journals_formatting.md +486 -0
- package/skills/shared/venue-guides/medical_journal_styles.md +535 -0
- package/skills/shared/venue-guides/ml_conference_style.md +556 -0
- package/skills/shared/venue-guides/nature_science_style.md +405 -0
- package/skills/shared/venue-guides/reviewer_expectations.md +417 -0
- package/skills/shared/venue-guides/venue_writing_styles.md +321 -0
- package/skills/split-pdf/SKILL.md +172 -0
- package/skills/split-pdf/methodology.md +48 -0
- package/skills/sync-notion/SKILL.md +93 -0
- package/skills/system-audit/SKILL.md +157 -0
- package/skills/system-audit/references/sub-agent-prompts.md +294 -0
- package/skills/task-management/SKILL.md +131 -0
- package/skills/update-focus/SKILL.md +204 -0
- package/skills/update-project-doc/SKILL.md +194 -0
- package/skills/validate-bib/SKILL.md +242 -0
- package/skills/validate-bib/references/council-mode.md +34 -0
- package/skills/validate-bib/references/deep-verify.md +71 -0
- package/skills/validate-bib/references/openalex-verification.md +45 -0
- package/skills/validate-bib/references/preprint-check.md +31 -0
- package/skills/validate-bib/references/report-template.md +62 -0
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# Concept Validation Gate
|
|
2
|
+
|
|
3
|
+
> Shared reference for writing and literature skills. Validates that a research concept is sufficiently developed before investing time in synthesis, drafting, or review. Adapted from CommScribe (Xu 2026).
|
|
4
|
+
|
|
5
|
+
## Principle
|
|
6
|
+
|
|
7
|
+
**A weak concept produces a weak paper.** Validate the concept before proceeding to literature synthesis or drafting. This prevents wasted effort on poorly defined research questions, missing theoretical framing, or generic AI-sounding proposals.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Validation Requirements
|
|
12
|
+
|
|
13
|
+
| Requirement | Minimum | Why It Matters |
|
|
14
|
+
|-------------|---------|----------------|
|
|
15
|
+
| **Word count** | 300 words | Demonstrates sufficient engagement with the idea |
|
|
16
|
+
| **Citations** | 3 references | Shows grounded knowledge, not speculation |
|
|
17
|
+
| **Research question** | Explicit, specific | Defines scope and testable contribution |
|
|
18
|
+
| **Theoretical framing** | Named framework or lens | Provides analytical structure |
|
|
19
|
+
| **Original voice** | Detected (not generic AI) | Ensures authentic intellectual engagement |
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## What a Concept Plan Must Address
|
|
24
|
+
|
|
25
|
+
### 1. Research Question
|
|
26
|
+
|
|
27
|
+
Specific, answerable, and falsifiable.
|
|
28
|
+
|
|
29
|
+
| Quality | Example |
|
|
30
|
+
|---------|---------|
|
|
31
|
+
| ❌ Too vague | "How does AI affect organisations?" |
|
|
32
|
+
| ❌ Too broad | "What is the impact of technology on decision-making?" |
|
|
33
|
+
| ✅ Specific | "How does the introduction of AI decision support change the weighting behaviour of expert panels in multi-criteria evaluation?" |
|
|
34
|
+
|
|
35
|
+
### 2. Theoretical Framing
|
|
36
|
+
|
|
37
|
+
Name the theory, cite the source, explain how it applies.
|
|
38
|
+
|
|
39
|
+
| Quality | Example |
|
|
40
|
+
|---------|---------|
|
|
41
|
+
| ❌ Missing | "I'll look at decision-making." |
|
|
42
|
+
| ❌ Name-dropped | "This uses prospect theory." |
|
|
43
|
+
| ✅ Engaged | "Drawing on Kahneman and Tversky's (1979) prospect theory, I examine whether AI recommendations shift reference points in expert judgement, potentially amplifying loss aversion in high-stakes MCDM contexts." |
|
|
44
|
+
|
|
45
|
+
### 3. Literature Context
|
|
46
|
+
|
|
47
|
+
What existing work does this build on or challenge?
|
|
48
|
+
|
|
49
|
+
| Quality | Example |
|
|
50
|
+
|---------|---------|
|
|
51
|
+
| ❌ Generic | "Many scholars have studied this." |
|
|
52
|
+
| ✅ Specific | "While Bansal et al. (2021) examined AI advice-taking in individual decisions, and Sunstein (2019) analysed group polarisation, neither addresses how AI interacts with structured multi-criteria processes where criteria weights are elicited." |
|
|
53
|
+
|
|
54
|
+
### 4. Contribution Claim
|
|
55
|
+
|
|
56
|
+
What is genuinely new?
|
|
57
|
+
|
|
58
|
+
| Quality | Example |
|
|
59
|
+
|---------|---------|
|
|
60
|
+
| ❌ Vague | "This fills a gap in the literature." |
|
|
61
|
+
| ✅ Specific | "By embedding AI recommendations within a live AHP process, I isolate the mechanism through which AI shifts weight allocations — something prior work has theorised but not tested experimentally." |
|
|
62
|
+
|
|
63
|
+
### 5. Scope Boundaries
|
|
64
|
+
|
|
65
|
+
What are you explicitly NOT covering?
|
|
66
|
+
|
|
67
|
+
**Example:** "Focus: expert panels in public sector procurement. Excluded: consumer-facing AI recommendations, autonomous systems without human oversight, non-MCDM decision frameworks."
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Depth Score
|
|
72
|
+
|
|
73
|
+
Beyond the checklist, assess intellectual depth (0.0–1.0):
|
|
74
|
+
|
|
75
|
+
### Depth Indicators (presence increases score)
|
|
76
|
+
|
|
77
|
+
| Category | Markers |
|
|
78
|
+
|----------|---------|
|
|
79
|
+
| **Nuance** | "however", "although", "yet", "while", "despite", "conversely" |
|
|
80
|
+
| **Critical thinking** | "gap", "limitation", "critique", "overlooked", "underexplored", "tension" |
|
|
81
|
+
| **Engagement** | "argues", "suggests", "contends", "demonstrates", "challenges", "extends" |
|
|
82
|
+
| **Theory** | "framework", "lens", "perspective", "mechanism", "construct", "typology" |
|
|
83
|
+
| **Methodology** | "method", "approach", "design", "identification", "estimation", "measure" |
|
|
84
|
+
|
|
85
|
+
### Scoring
|
|
86
|
+
|
|
87
|
+
- Count markers per category
|
|
88
|
+
- Normalise by word count (per 100 words)
|
|
89
|
+
- Weight: Nuance (0.25), Critical thinking (0.25), Engagement (0.20), Theory (0.15), Methodology (0.15)
|
|
90
|
+
- **Target:** depth_score ≥ 0.4
|
|
91
|
+
|
|
92
|
+
### Interpretation
|
|
93
|
+
|
|
94
|
+
| Score | Assessment |
|
|
95
|
+
|-------|-----------|
|
|
96
|
+
| < 0.2 | Generic — likely AI-generated or insufficiently developed |
|
|
97
|
+
| 0.2–0.4 | Surface-level — needs more critical engagement |
|
|
98
|
+
| 0.4–0.6 | Adequate — meets minimum for proceeding |
|
|
99
|
+
| 0.6–0.8 | Strong — shows genuine intellectual engagement |
|
|
100
|
+
| > 0.8 | Excellent — deep, nuanced, ready for synthesis |
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Red Flags
|
|
105
|
+
|
|
106
|
+
Flag these phrases — they signal generic or AI-generated concepts:
|
|
107
|
+
|
|
108
|
+
| Red Flag | Problem |
|
|
109
|
+
|----------|---------|
|
|
110
|
+
| "This paper will explore..." | Generic opener |
|
|
111
|
+
| "In recent years..." | Filler, not specific |
|
|
112
|
+
| "With the rise of..." | Cliché |
|
|
113
|
+
| "The purpose of this research is to..." | Formulaic |
|
|
114
|
+
| "fills a gap in the literature" | Overused claim without specificity |
|
|
115
|
+
| "This is an important topic because..." | Assertion without evidence |
|
|
116
|
+
| "Much has been written about..." | Vague attribution |
|
|
117
|
+
|
|
118
|
+
**Response when detected:** "Your concept sounds generic. Use specific details from your reading to establish your voice and demonstrate genuine engagement with the literature."
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## Validation Outcomes
|
|
123
|
+
|
|
124
|
+
### ✅ PASS
|
|
125
|
+
|
|
126
|
+
All requirements met, depth_score ≥ 0.4, no red flags (or red flags are minor).
|
|
127
|
+
|
|
128
|
+
→ Proceed to literature synthesis or drafting.
|
|
129
|
+
|
|
130
|
+
### ⚠️ REVISE
|
|
131
|
+
|
|
132
|
+
1-2 requirements unmet or depth_score 0.2–0.4.
|
|
133
|
+
|
|
134
|
+
→ Provide specific feedback on what to strengthen. Ask for revision before proceeding.
|
|
135
|
+
|
|
136
|
+
### ❌ FAIL
|
|
137
|
+
|
|
138
|
+
3+ requirements unmet or depth_score < 0.2.
|
|
139
|
+
|
|
140
|
+
→ Concept is not ready. Suggest the user read more in the area, narrow the question, or identify a theoretical framework before returning.
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## How Skills Use This
|
|
145
|
+
|
|
146
|
+
### In `/literature` (before synthesis)
|
|
147
|
+
|
|
148
|
+
1. Request concept plan from user
|
|
149
|
+
2. Run validation checks
|
|
150
|
+
3. If PASS → proceed to search and synthesis
|
|
151
|
+
4. If REVISE/FAIL → return feedback, wait for revision
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
1. Check if a validated concept exists (in project's `.planning/` or `CONCEPT.md`)
|
|
155
|
+
2. If not → run validation gate before drafting
|
|
156
|
+
3. If yes → use the concept to guide section structure
|
|
157
|
+
|
|
158
|
+
### In review agents
|
|
159
|
+
|
|
160
|
+
1. Check whether the paper's introduction meets concept validation standards
|
|
161
|
+
2. A paper that would FAIL the concept gate has fundamental framing issues → flag as Critical
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
# Council Protocol
|
|
2
|
+
|
|
3
|
+
> Shared protocol for multi-model council mode. Any review agent or skill can opt into this by providing domain-specific system prompts and output formatting. This file defines the generic orchestration flow.
|
|
4
|
+
>
|
|
5
|
+
> **Included backend:** `cli-council` (local CLI tools, free with existing subscriptions). An optional API backend (`llm-council` via OpenRouter) is available separately — see below.
|
|
6
|
+
|
|
7
|
+
## Core Concept: Cross-Model Agentic Invocation
|
|
8
|
+
|
|
9
|
+
Claude Code can invoke other LLM providers' CLI tools as subprocess reviewers — a different model reviews work that Claude produced, providing genuine architectural diversity. The system is **extensible**: any CLI tool that accepts a prompt and returns text can be wrapped as a backend (~20 lines of Python following the `BackendSpec` pattern in `packages/cli-council/`). Available backends change as subscriptions change; the architecture does not.
|
|
10
|
+
|
|
11
|
+
## What Council Mode Is
|
|
12
|
+
|
|
13
|
+
Council mode coordinates this cross-model capability into a structured 3-stage deliberation:
|
|
14
|
+
|
|
15
|
+
1. **Stage 1: Independent Assessments** — N models (typically 3, each from a different provider) independently evaluate the same artifact using the same instructions
|
|
16
|
+
2. **Stage 2: Anonymised Peer Review** — each model evaluates the others' assessments without knowing which model produced which
|
|
17
|
+
3. **Stage 3: Chairman Synthesis** — a chairman model reads everything and produces the final report
|
|
18
|
+
|
|
19
|
+
The key insight: genuine model diversity (different architectures, training data, biases) surfaces issues that any single model — or even multiple instances of the same model — would miss.
|
|
20
|
+
|
|
21
|
+
## Infrastructure
|
|
22
|
+
|
|
23
|
+
### CLI Backend: `cli-council` (Included)
|
|
24
|
+
|
|
25
|
+
Package: `packages/cli-council/`
|
|
26
|
+
|
|
27
|
+
- `CouncilRunner` — orchestrator that invokes CLI backends via subprocess
|
|
28
|
+
- Pluggable backends: `GeminiBackend`, `ClaudeBackend`, and a dormant `CodexBackend` (OpenAI subscription cancelled Mar 2026; resubscribing would restore it). New backends follow the same `BackendSpec` pattern.
|
|
29
|
+
- `CouncilResult` — Pydantic models for text-based results
|
|
30
|
+
- CLI — `python -m cli_council` for standalone use
|
|
31
|
+
- Uses existing subscriptions — no per-token API costs
|
|
32
|
+
- **Currently active backends:** Gemini (`gemini -p`), Claude (`claude -p`)
|
|
33
|
+
- **Best for:** Ad-hoc reviews, research tasks, quick multi-perspective opinions
|
|
34
|
+
|
|
35
|
+
### API Backend: `llm-council` (Optional, Separate Install)
|
|
36
|
+
|
|
37
|
+
> Not included in this repo. Install separately: `pip install llm-council` or clone from GitHub.
|
|
38
|
+
|
|
39
|
+
- `LLMClient` — generic async OpenRouter client with JSON/text chat and retry logic
|
|
40
|
+
- `CouncilService` — 3-stage orchestration engine with customisable Stage 2/3 prompts
|
|
41
|
+
- `CouncilResult` — Pydantic models for structured JSON results
|
|
42
|
+
- CLI — `python -m llm_council` for standalone use
|
|
43
|
+
- Requires `OPENROUTER_API_KEY` in the environment
|
|
44
|
+
- **Best for:** Automated pipelines, structured JSON output, programmatic integration
|
|
45
|
+
|
|
46
|
+
### Choosing a Backend
|
|
47
|
+
|
|
48
|
+
| Factor | `cli-council` (included) | `llm-council` (separate) |
|
|
49
|
+
|--------|--------------------------|--------------------------|
|
|
50
|
+
| Cost | Subscription-included | Per-token (OpenRouter) |
|
|
51
|
+
| Output format | Free-form text | Structured JSON |
|
|
52
|
+
| Reliability | Variable (CLI output parsing) | High (API contracts) |
|
|
53
|
+
| Speed | Slower (subprocess overhead) | Fast (parallel async HTTP) |
|
|
54
|
+
| Model control | Whatever CLIs support | Full OpenRouter catalogue |
|
|
55
|
+
| Offline | Partially (Claude -p works offline) | No |
|
|
56
|
+
|
|
57
|
+
**Default:** Use `cli-council` (included and free). Use `llm-council` only if you need structured JSON output or are running in an automated pipeline.
|
|
58
|
+
|
|
59
|
+
## When to Use
|
|
60
|
+
|
|
61
|
+
- Pre-submission quality checks (high stakes)
|
|
62
|
+
- When thoroughness matters more than speed
|
|
63
|
+
- When the user explicitly requests "council mode", "council review", or "thorough review"
|
|
64
|
+
- Never the default — standard single-reviewer mode remains the default for all consumers
|
|
65
|
+
|
|
66
|
+
## Parallel Independent Review
|
|
67
|
+
|
|
68
|
+
Beyond multi-model council mode, review agents can also be launched **in parallel** within a single Claude Code session for maximum coverage from different perspectives:
|
|
69
|
+
|
|
70
|
+
1. **Pre-flight:** Launch `fatal-error-check` first (haiku model, ~15-30 seconds). If it returns FAIL, fix the fatal errors before proceeding.
|
|
71
|
+
2. **Parallel launch:** If the pre-flight passes, launch all three review agents simultaneously in a **single message** with three Agent tool calls:
|
|
72
|
+
- `paper-critic` — adversarial LaTeX audit (grammar, notation, citation, tone, LaTeX, TikZ)
|
|
73
|
+
- `domain-reviewer` — substantive correctness (assumptions, derivations, citations, code-theory, backward logic)
|
|
74
|
+
- `referee2-reviewer` — full Reviewer 2 audit (identification, methods, robustness, presentation, scholarly rigour)
|
|
75
|
+
3. **Synthesise:** Once all three agents return, run `/synthesise-reviews` to cross-reference issues, apply consensus escalation, and produce a unified `REVISION-PLAN.md`.
|
|
76
|
+
|
|
77
|
+
This pattern maximises coverage by combining complementary review perspectives. Each agent has different check dimensions and catches different classes of issues. Parallel launch saves time compared to sequential runs.
|
|
78
|
+
|
|
79
|
+
**When to use parallel review vs council mode:**
|
|
80
|
+
|
|
81
|
+
| Scenario | Use |
|
|
82
|
+
|----------|-----|
|
|
83
|
+
| Maximum coverage from different review perspectives | Parallel independent review |
|
|
84
|
+
| Model diversity (different LLM architectures finding different issues) | Council mode |
|
|
85
|
+
| Both perspectives AND model diversity | Parallel review first, then council mode on the most Critical workstream |
|
|
86
|
+
| Quick pre-submission check | Fatal-error-check only |
|
|
87
|
+
|
|
88
|
+
## Prerequisites for a Consumer
|
|
89
|
+
|
|
90
|
+
An agent or skill that supports council mode must provide:
|
|
91
|
+
|
|
92
|
+
| What | Where | Purpose |
|
|
93
|
+
|------|-------|---------|
|
|
94
|
+
| **System prompt builder** | Consumer's `references/council-personas.md` | How to construct the system prompt sent to all models |
|
|
95
|
+
| **Output formatter** | Consumer's `references/council-prompts.md` | Stage 3 chairman prompt template + output format |
|
|
96
|
+
| **Council mode section** | Consumer's agent/skill body | Short section noting support + pointer to reference files |
|
|
97
|
+
| **Trigger phrases** | Consumer's frontmatter description/examples | How the user activates council mode |
|
|
98
|
+
|
|
99
|
+
## Orchestration Protocol
|
|
100
|
+
|
|
101
|
+
The **main session** orchestrates council mode. Review agents cannot orchestrate themselves (they lack Bash). When council mode is triggered:
|
|
102
|
+
|
|
103
|
+
### Pre-flight
|
|
104
|
+
|
|
105
|
+
1. Run the consumer's standard pre-checks and hard gates
|
|
106
|
+
2. If any gate fails, report immediately — do not invoke the council (save cost)
|
|
107
|
+
3. Collect all source material (file contents, logs, rubrics) into a system prompt and user message
|
|
108
|
+
4. Read the consumer's reference files for prompt construction guidance
|
|
109
|
+
|
|
110
|
+
### Stage 1: Independent Assessments
|
|
111
|
+
|
|
112
|
+
The main session invokes the `llm-council` package (via CLI or Python script). The library:
|
|
113
|
+
|
|
114
|
+
1. Sends the system prompt + user message to N different LLM models via OpenRouter
|
|
115
|
+
2. Each model independently produces a JSON assessment
|
|
116
|
+
3. All calls are parallel (async)
|
|
117
|
+
4. Failed models are logged and skipped — the council proceeds with available responses
|
|
118
|
+
|
|
119
|
+
**Default models:** `anthropic/claude-sonnet-4.5`, `openai/gpt-5`, `google/gemini-2.5-pro`
|
|
120
|
+
|
|
121
|
+
### Stage 2: Anonymised Peer Review
|
|
122
|
+
|
|
123
|
+
The library automatically:
|
|
124
|
+
|
|
125
|
+
1. Labels Stage 1 assessments as "Assessment A", "Assessment B", etc. (anonymised)
|
|
126
|
+
2. Sends all assessments to each model for cross-evaluation
|
|
127
|
+
3. Each model evaluates the others' work, identifies agreements/disagreements, and provides a ranking
|
|
128
|
+
4. Rankings are parsed and aggregated
|
|
129
|
+
|
|
130
|
+
**Model:** Same models as Stage 1 (each reviews the others' work).
|
|
131
|
+
|
|
132
|
+
### Stage 3: Chairman Synthesis
|
|
133
|
+
|
|
134
|
+
The library:
|
|
135
|
+
|
|
136
|
+
1. Sends all assessments and peer reviews to the chairman model
|
|
137
|
+
2. The chairman considers all inputs and produces a single synthesised response
|
|
138
|
+
3. The response follows the consumer's required output schema
|
|
139
|
+
|
|
140
|
+
**Default chairman:** `anthropic/claude-sonnet-4.5`
|
|
141
|
+
|
|
142
|
+
### Write Output
|
|
143
|
+
|
|
144
|
+
The main session receives the `CouncilResult` JSON and formats it into the consumer's standard output (e.g., `CRITIC-REPORT.md` for paper-critic). The report uses the consumer's standard format with two sections appended:
|
|
145
|
+
|
|
146
|
+
```markdown
|
|
147
|
+
## Council Notes
|
|
148
|
+
|
|
149
|
+
### Agreement Summary
|
|
150
|
+
- [N] issues confirmed by all reviewers
|
|
151
|
+
- [N] issues confirmed by majority
|
|
152
|
+
- [N] issues from single reviewer (validated in cross-review)
|
|
153
|
+
- [N] disputed issues (marked [DISPUTED])
|
|
154
|
+
|
|
155
|
+
### Aggregate Rankings
|
|
156
|
+
| Assessment | Model | Avg Rank | Rankings Count |
|
|
157
|
+
|------------|-------|----------|----------------|
|
|
158
|
+
| Assessment A | [model name] | X.X | N |
|
|
159
|
+
| Assessment B | [model name] | X.X | N |
|
|
160
|
+
| Assessment C | [model name] | X.X | N |
|
|
161
|
+
|
|
162
|
+
## Council Metadata
|
|
163
|
+
- **Mode:** Council ([N] models + peer review + chairman)
|
|
164
|
+
- **Models:** [list of model IDs used]
|
|
165
|
+
- **Chairman:** [chairman model ID]
|
|
166
|
+
- **Timing:** Stage 1: Xms, Stage 2: Xms, Stage 3: Xms, Total: Xms
|
|
167
|
+
- **Date:** YYYY-MM-DD
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
These sections are appended **after** the consumer's standard report content. Downstream consumers (e.g., fixer agent) that parse only the standard sections are unaffected.
|
|
171
|
+
|
|
172
|
+
## CLI Invocation
|
|
173
|
+
|
|
174
|
+
### Option A: CLI Backend (`cli-council` — Included)
|
|
175
|
+
|
|
176
|
+
For ad-hoc reviews using existing subscriptions (no API cost):
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
cd "packages/cli-council"
|
|
180
|
+
uv run python -m cli_council \
|
|
181
|
+
--prompt-file /tmp/council-prompt.txt \
|
|
182
|
+
--context-file /tmp/council-context.txt \
|
|
183
|
+
--output /tmp/council-result.json \
|
|
184
|
+
--output-md /tmp/council-report.md \
|
|
185
|
+
--chairman claude \
|
|
186
|
+
--timeout 180
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
- Write the paper content / review instructions to `--context-file`, and the specific question to `--prompt-file`
|
|
190
|
+
- Output is free-form text — the markdown report (`--output-md`) is usually more useful than JSON
|
|
191
|
+
- The chairman backend defaults to `claude` (since we're already in Claude Code)
|
|
192
|
+
|
|
193
|
+
### Option B: API Backend (`llm-council` — Separate Install)
|
|
194
|
+
|
|
195
|
+
> Requires separate installation: `pip install llm-council` and an `OPENROUTER_API_KEY`.
|
|
196
|
+
|
|
197
|
+
For structured JSON output and automated pipelines:
|
|
198
|
+
|
|
199
|
+
```bash
|
|
200
|
+
uv run python -m llm_council \
|
|
201
|
+
--system-prompt-file /tmp/council-system.txt \
|
|
202
|
+
--user-message-file /tmp/council-user.txt \
|
|
203
|
+
--models "anthropic/claude-sonnet-4.5,openai/gpt-5,google/gemini-2.5-pro" \
|
|
204
|
+
--chairman "anthropic/claude-sonnet-4.5" \
|
|
205
|
+
--output /tmp/council-result.json
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
For advanced cases (custom Stage 2/3 prompts), write a small Python script that imports `llm_council` and calls `CouncilService.run_council()` with `stage2_system` and `stage3_prompt_builder` parameters.
|
|
209
|
+
|
|
210
|
+
## Issue Resolution Rules (Chairman)
|
|
211
|
+
|
|
212
|
+
The consumer's chairman prompt should instruct the chairman to apply these rules:
|
|
213
|
+
|
|
214
|
+
| Situation | Action |
|
|
215
|
+
|-----------|--------|
|
|
216
|
+
| Issue confirmed by 2+ models | Retain at the **highest** agreed severity |
|
|
217
|
+
| Issue from 1 model, validated in peer review | Retain at the original severity |
|
|
218
|
+
| Issue from 1 model, disputed in peer review | Retain with `[DISPUTED]` tag; chairman makes final severity call |
|
|
219
|
+
| Issue found only in peer review (missed initially) | Add as a new finding |
|
|
220
|
+
| Conflicting severity assessments | Chairman decides; notes the range in the issue description |
|
|
221
|
+
|
|
222
|
+
**Scoring:** The chairman produces an independent score informed by all inputs — not a mechanical average.
|
|
223
|
+
|
|
224
|
+
## Model Configuration
|
|
225
|
+
|
|
226
|
+
| Parameter | Built-in Default | Override |
|
|
227
|
+
|-----------|-----------------|---------|
|
|
228
|
+
| Stage 1 models | `anthropic/claude-sonnet-4.5`, `openai/gpt-5`, `google/gemini-2.5-pro` | `--models` CLI flag or user config |
|
|
229
|
+
| Chairman model | `anthropic/claude-sonnet-4.5` | `--chairman` CLI flag or user config |
|
|
230
|
+
| Max tokens | 4096 | `--max-tokens` CLI flag |
|
|
231
|
+
|
|
232
|
+
**User defaults** persist to `~/.config/llm-council/config.json` and override built-in defaults. Manage via `llm-council models --set-defaults` / `--set-chairman` / `--reset`, or interactively with `llm-council models --pricing` to review options first.
|
|
233
|
+
|
|
234
|
+
The library's `config.py` contains the full model registry (17 models across Anthropic, OpenAI, Google) with tiers and live pricing.
|
|
235
|
+
|
|
236
|
+
## Cost Considerations
|
|
237
|
+
|
|
238
|
+
Council mode costs significantly more than standard mode because it calls N models for Stage 1, N models for Stage 2, and 1 model for Stage 3 (total: 2N+1 API calls). With 3 models:
|
|
239
|
+
|
|
240
|
+
- **Standard mode:** 1 agent call (free — uses Claude Code context)
|
|
241
|
+
- **Council mode:** 7 OpenRouter API calls (3 + 3 + 1)
|
|
242
|
+
|
|
243
|
+
Pricing depends on the models chosen. Check OpenRouter for current rates. Use council mode when thoroughness justifies the cost — typically pre-submission or high-stakes reviews.
|
|
244
|
+
|
|
245
|
+
## Persona Support (Optional)
|
|
246
|
+
|
|
247
|
+
Each consumer can define **personas** in `references/council-personas.md` — distinct reviewer emphases that are prepended to the system prompt. Since council mode already uses different LLM providers (which bring natural perspective diversity), personas are optional but can add further differentiation.
|
|
248
|
+
|
|
249
|
+
Current approach: the same system prompt goes to all models. Personas are documented as reference material describing what each model *tends to focus on* based on its architecture. Future extension: per-model system prompt variants via the library's API.
|
|
250
|
+
|
|
251
|
+
## Consumers
|
|
252
|
+
|
|
253
|
+
| Consumer | CLI (`cli-council`) | API (`llm-council`) | Notes |
|
|
254
|
+
|----------|---------------------|---------------------|-------|
|
|
255
|
+
| `paper-critic` | Supported | Implemented | First consumer — Technical Rigour, Presentation, Scholarly Standards personas |
|
|
256
|
+
| `referee2-reviewer` | Supported | Supported | 5-audit protocol + council cross-review — highest-value consumer |
|
|
257
|
+
| `domain-reviewer` | Supported | — | Math/assumption checking — different models catch different derivation gaps |
|
|
258
|
+
| `proposal-reviewer` | Supported | — | Feasibility and novelty — different models have different domain knowledge |
|
|
259
|
+
| `peer-reviewer` | Supported | — | Full paper review — the canonical use case for multi-model deliberation |
|
|
260
|
+
| `multi-perspective` | Supported | — | Replaces Claude-only sub-agents with genuine model diversity |
|
|
261
|
+
| `literature` | Implemented | — | Phase 2b (search) and Phase 7 (synthesis) — see skill definition |
|
|
262
|
+
| `devils-advocate` | Supported | — | Round 1/2/3 played by different models for genuine adversarial tension |
|
|
263
|
+
| `proofread` | Supported | — | Lower value — most useful for notation consistency and citation voice balance |
|
|
264
|
+
| `code-review` | Supported | — | Most valuable for domain correctness and cross-language verification |
|
|
265
|
+
| `bib-validate` | Supported | — | Different models have different bibliographic knowledge — catches metadata mismatches |
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# Distribution Diagnostics Before Model Selection
|
|
2
|
+
|
|
3
|
+
> Shared reference for `/data-analysis` and review agents. Mandatory checks on dependent variables before selecting a statistical model. Prevents misspecification. Adapted from CommDAAF AgentAcademy protocol (Xu 2026).
|
|
4
|
+
|
|
5
|
+
## Principle
|
|
6
|
+
|
|
7
|
+
**Never run a regression without inspecting the DV distribution first.** OLS on count data, Poisson on overdispersed data, and linear models on zero-inflated outcomes all produce misleading results. Five minutes of diagnostics prevents weeks of wasted analysis.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Mandatory Checks
|
|
12
|
+
|
|
13
|
+
Run these on every dependent variable before model selection:
|
|
14
|
+
|
|
15
|
+
| Diagnostic | What to compute | Why it matters |
|
|
16
|
+
|-----------|----------------|----------------|
|
|
17
|
+
| **Basic stats** | N, mean, median, SD, range | Understand the variable |
|
|
18
|
+
| **Skewness** | `scipy.stats.skew(y)` or `moments::skewness(y)` | \|skew\| > 1 → OLS assumptions likely violated |
|
|
19
|
+
| **Zero proportion** | `sum(y == 0) / N` | > 15% zeros → consider zero-inflated or hurdle models |
|
|
20
|
+
| **Overdispersion** | `var(y) / mean(y)` | > 1.5 → Poisson is wrong, use Negative Binomial |
|
|
21
|
+
| **Normality** | QQ-plot + Shapiro-Wilk (if N < 5000) | Formal test, but visual inspection matters more |
|
|
22
|
+
| **Outliers** | IQR method or robust Mahalanobis distance | Extreme values can dominate OLS estimates |
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Model Selection Decision Tree
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
Is the DV a count (0, 1, 2, ...)?
|
|
30
|
+
├── Yes → Check overdispersion (var/mean > 1.5?)
|
|
31
|
+
│ ├── Yes → Check zero proportion (> 30%?)
|
|
32
|
+
│ │ ├── Yes → Zero-inflated NB or Hurdle model
|
|
33
|
+
│ │ └── No → Negative Binomial
|
|
34
|
+
│ └── No → Check zero proportion (> 30%?)
|
|
35
|
+
│ ├── Yes → Zero-inflated Poisson
|
|
36
|
+
│ └── No → Poisson
|
|
37
|
+
├── Is the DV a proportion or bounded [0, 1]?
|
|
38
|
+
│ └── Yes → Beta regression (or fractional logit)
|
|
39
|
+
├── Is the DV binary (0/1)?
|
|
40
|
+
│ └── Yes → Logistic regression
|
|
41
|
+
├── Is the DV ordinal (ordered categories)?
|
|
42
|
+
│ └── Yes → Ordered logistic/probit
|
|
43
|
+
└── Is the DV continuous?
|
|
44
|
+
└── Check skewness and normality of residuals
|
|
45
|
+
├── Residuals ~normal → OLS
|
|
46
|
+
├── Highly skewed DV → Log-transform, then OLS (report both)
|
|
47
|
+
└── Heavy tails → Robust regression or quantile regression
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
**Key rule:** Never use OLS on raw counts without explicit justification. Social media engagement, citation counts, survey response counts — these are almost never normally distributed.
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## Effect Size Reporting
|
|
55
|
+
|
|
56
|
+
### For count models (NB, Poisson): report Incidence Rate Ratios (IRR)
|
|
57
|
+
|
|
58
|
+
| IRR | Interpretation |
|
|
59
|
+
|-----|---------------|
|
|
60
|
+
| 1.0 | No effect |
|
|
61
|
+
| 1.2 | 20% increase |
|
|
62
|
+
| 1.5 | 50% increase |
|
|
63
|
+
| 2.0 | Double |
|
|
64
|
+
| 0.5 | Half |
|
|
65
|
+
|
|
66
|
+
**Always translate to practical meaning:** "Posts with frame X received 50% more engagement (IRR = 1.50, 95% CI [1.22, 1.84])" — not just "β = 0.41, p < 0.01".
|
|
67
|
+
|
|
68
|
+
### For OLS: report standardised coefficients alongside raw
|
|
69
|
+
|
|
70
|
+
Help readers judge magnitude, not just significance.
|
|
71
|
+
|
|
72
|
+
### For logistic: report odds ratios AND predicted probabilities
|
|
73
|
+
|
|
74
|
+
Odds ratios are hard to interpret. Show predicted probability at meaningful values of the IV.
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Multiple Testing
|
|
79
|
+
|
|
80
|
+
When testing multiple predictors or outcomes:
|
|
81
|
+
|
|
82
|
+
| Method | When to use |
|
|
83
|
+
|--------|------------|
|
|
84
|
+
| **Bonferroni** | Conservative; few tests (< 10) |
|
|
85
|
+
| **Holm** | Less conservative; sequential rejection |
|
|
86
|
+
| **FDR (Benjamini-Hochberg)** | Many tests (> 10); controls false discovery rate |
|
|
87
|
+
|
|
88
|
+
**Always report both raw and adjusted p-values.** Let readers assess.
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## Implementation
|
|
93
|
+
|
|
94
|
+
### Python
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
import numpy as np
|
|
98
|
+
from scipy import stats
|
|
99
|
+
|
|
100
|
+
def distribution_diagnostics(y, name="DV"):
|
|
101
|
+
"""Run mandatory diagnostics before model selection."""
|
|
102
|
+
n = len(y)
|
|
103
|
+
skewness = stats.skew(y)
|
|
104
|
+
pct_zeros = np.sum(y == 0) / n * 100
|
|
105
|
+
var_mean = np.var(y) / np.mean(y) if np.mean(y) > 0 else float('inf')
|
|
106
|
+
|
|
107
|
+
diagnostics = {
|
|
108
|
+
'n': n, 'mean': np.mean(y), 'median': np.median(y),
|
|
109
|
+
'sd': np.std(y), 'skewness': skewness,
|
|
110
|
+
'pct_zeros': pct_zeros, 'var_mean_ratio': var_mean,
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
# Model recommendation
|
|
114
|
+
if pct_zeros > 30:
|
|
115
|
+
diagnostics['recommendation'] = 'Zero-inflated model or Hurdle'
|
|
116
|
+
elif var_mean > 1.5:
|
|
117
|
+
diagnostics['recommendation'] = 'Negative Binomial'
|
|
118
|
+
elif abs(skewness) > 1:
|
|
119
|
+
diagnostics['recommendation'] = 'Log-transform or GLM'
|
|
120
|
+
else:
|
|
121
|
+
diagnostics['recommendation'] = 'OLS (verify residuals)'
|
|
122
|
+
|
|
123
|
+
return diagnostics
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### R
|
|
127
|
+
|
|
128
|
+
```r
|
|
129
|
+
distribution_diagnostics <- function(y, name = "DV") {
|
|
130
|
+
n <- length(y)
|
|
131
|
+
skew <- moments::skewness(y)
|
|
132
|
+
pct_zeros <- sum(y == 0) / n * 100
|
|
133
|
+
var_mean <- var(y) / mean(y)
|
|
134
|
+
|
|
135
|
+
cat(sprintf("=== %s (N=%d) ===\n", name, n))
|
|
136
|
+
cat(sprintf("Mean: %.3f | Median: %.3f | SD: %.3f\n", mean(y), median(y), sd(y)))
|
|
137
|
+
cat(sprintf("Skewness: %.3f | Zeros: %.1f%% | Var/Mean: %.3f\n", skew, pct_zeros, var_mean))
|
|
138
|
+
|
|
139
|
+
if (pct_zeros > 30) cat("→ Zero-inflated or Hurdle model\n")
|
|
140
|
+
else if (var_mean > 1.5) cat("→ Negative Binomial\n")
|
|
141
|
+
else if (abs(skew) > 1) cat("→ Log-transform or GLM\n")
|
|
142
|
+
else cat("→ OLS (verify residuals)\n")
|
|
143
|
+
}
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
148
|
+
## Integration
|
|
149
|
+
|
|
150
|
+
### In `/data-analysis` Phase 1 (EDA)
|
|
151
|
+
|
|
152
|
+
Run `distribution_diagnostics()` on every DV and key IVs before proceeding to estimation. If the diagnostics suggest a non-OLS model, flag this before the user locks their specification.
|
|
153
|
+
|
|
154
|
+
### In review agents
|
|
155
|
+
|
|
156
|
+
Check whether the paper reports distribution diagnostics or justifies model choice. A paper using OLS on count data without justification → flag as Major issue.
|
|
157
|
+
|
|
158
|
+
### Validation tier interaction
|
|
159
|
+
|
|
160
|
+
| Tier | Requirement |
|
|
161
|
+
|------|------------|
|
|
162
|
+
| 🟢 Exploratory | Run diagnostics, note recommendation |
|
|
163
|
+
| 🟡 Pilot | Run diagnostics, justify model choice in notes |
|
|
164
|
+
| 🔴 Publication | Run diagnostics, report in paper, compare 2+ model families |
|